diff --git a/Cargo.toml b/Cargo.toml
index 36bd4a6a03..43b6dc1ea5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,7 +8,6 @@ members = [
   "crates/std_detect",
   "crates/stdarch-gen-arm",
   "crates/stdarch-gen-loongarch",
-  "crates/stdarch-gen",
   "crates/stdarch-gen2",
   "crates/intrinsic-test",
   "examples/"
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index d2a906604b..08f7aecdb6 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -1,199 +1,221 @@
 // This code is automatically generated. DO NOT MODIFY.
 //
-// Instead, modify `crates/stdarch-gen-arm/neon.spec` and run the following command to re-generate this file:
+// Instead, modify `crates/stdarch-gen2/spec/` and run the following command to re-generate this file:
 //
 // ```
-// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-arm -- crates/stdarch-gen-arm/neon.spec
+// cargo run --bin=stdarch-gen2 -- crates/stdarch-gen2/spec
 // ```
-use super::*;
+#![allow(improper_ctypes)]
+
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-/// Three-way exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s8)
+use super::*;
+
+#[doc = "Signed Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(eor3))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3s.v16i8")]
-        fn veor3q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
-    }
-    veor3q_s8_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal))]
+pub unsafe fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let e: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: int8x8_t = vabd_s8(d, e);
+    let f: uint8x8_t = simd_cast(f);
+    simd_add(a, simd_cast(f))
 }
 
-/// Three-way exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s16)
+#[doc = "Signed Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(eor3))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3s.v8i16")]
-        fn veor3q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
-    }
-    veor3q_s16_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal))]
+pub unsafe fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let e: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    let f: int16x4_t = vabd_s16(d, e);
+    let f: uint16x4_t = simd_cast(f);
+    simd_add(a, simd_cast(f))
 }
 
-/// Three-way exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s32)
+#[doc = "Signed Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(eor3))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3s.v4i32")]
-        fn veor3q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
-    }
-    veor3q_s32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal))]
+pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let d: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let e: int32x2_t = simd_shuffle!(c, c, [2, 3]);
+    let f: int32x2_t = vabd_s32(d, e);
+    let f: uint32x2_t = simd_cast(f);
+    simd_add(a, simd_cast(f))
 }
 
-/// Three-way exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s64)
+#[doc = "Unsigned Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(eor3))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3s.v2i64")]
-        fn veor3q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
-    }
-    veor3q_s64_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal))]
+pub unsafe fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
+    let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let e: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: uint8x8_t = vabd_u8(d, e);
+    simd_add(a, simd_cast(f))
 }
 
-/// Three-way exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u8)
+#[doc = "Unsigned Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(eor3))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3u.v16i8")]
-        fn veor3q_u8_(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t;
-    }
-    veor3q_u8_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal))]
+pub unsafe fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    let d: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let e: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    let f: uint16x4_t = vabd_u16(d, e);
+    simd_add(a, simd_cast(f))
 }
 
-/// Three-way exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u16)
+#[doc = "Unsigned Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(eor3))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3u.v8i16")]
-        fn veor3q_u16_(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t;
-    }
-    veor3q_u16_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal))]
+pub unsafe fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    let d: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let e: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
+    let f: uint32x2_t = vabd_u32(d, e);
+    simd_add(a, simd_cast(f))
 }
 
-/// Three-way exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u32)
+#[doc = "Absolute difference between the arguments of Floating"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(eor3))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3u.v4i32")]
-        fn veor3q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fabd.v1f64"
+        )]
+        fn _vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
-    veor3q_u32_(a, b, c)
+    _vabd_f64(a, b)
 }
 
-/// Three-way exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u64)
+#[doc = "Absolute difference between the arguments of Floating"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(eor3))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.eor3u.v2i64")]
-        fn veor3q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fabd.v2f64"
+        )]
+        fn _vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    veor3q_u64_(a, b, c)
+    _vabdq_f64(a, b)
 }
 
-/// Absolute difference between the arguments of Floating
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f64)
+#[doc = "Floating-point absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fabd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fabd.v1f64")]
-        fn vabd_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    vabd_f64_(a, b)
+#[cfg_attr(test, assert_instr(fabd))]
+pub unsafe fn vabdd_f64(a: f64, b: f64) -> f64 {
+    simd_extract!(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
 
-/// Absolute difference between the arguments of Floating
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_f64)
+#[doc = "Floating-point absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabds_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fabd))]
+pub unsafe fn vabds_f32(a: f32, b: f32) -> f32 {
+    simd_extract!(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+}
+
+#[doc = "Signed Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fabd.v2f64")]
-        fn vabdq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    vabdq_f64_(a, b)
+#[cfg_attr(test, assert_instr(sabdl))]
+pub unsafe fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let e: uint16x4_t = simd_cast(vabd_s16(c, d));
+    simd_cast(e)
 }
 
-/// Floating-point absolute difference
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabds_f32)
+#[doc = "Signed Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fabd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabds_f32(a: f32, b: f32) -> f32 {
-    simd_extract!(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+#[cfg_attr(test, assert_instr(sabdl))]
+pub unsafe fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let d: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let e: uint32x2_t = simd_cast(vabd_s32(c, d));
+    simd_cast(e)
 }
 
-/// Floating-point absolute difference
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdd_f64)
+#[doc = "Signed Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fabd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabdd_f64(a: f64, b: f64) -> f64 {
-    simd_extract!(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+#[cfg_attr(test, assert_instr(sabdl))]
+pub unsafe fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let e: uint8x8_t = simd_cast(vabd_s8(c, d));
+    simd_cast(e)
 }
 
-/// Unsigned Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u8)
+#[doc = "Unsigned Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(uabdl))]
@@ -204,9 +226,10 @@ pub unsafe fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
     simd_cast(vabd_u8(c, d))
 }
 
-/// Unsigned Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u16)
+#[doc = "Unsigned Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(uabdl))]
@@ -217,9 +240,10 @@ pub unsafe fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     simd_cast(vabd_u16(c, d))
 }
 
-/// Unsigned Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u32)
+#[doc = "Unsigned Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(uabdl))]
@@ -230,1131 +254,1457 @@ pub unsafe fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     simd_cast(vabd_u32(c, d))
 }
 
-/// Signed Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s8)
+#[doc = "Floating-point absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sabdl))]
+#[cfg_attr(test, assert_instr(fabs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
-    let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let e: uint8x8_t = simd_cast(vabd_s8(c, d));
-    simd_cast(e)
+pub unsafe fn vabs_f64(a: float64x1_t) -> float64x1_t {
+    simd_fabs(a)
 }
 
-/// Signed Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)
+#[doc = "Floating-point absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sabdl))]
+#[cfg_attr(test, assert_instr(fabs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let e: uint16x4_t = simd_cast(vabd_s16(c, d));
-    simd_cast(e)
+pub unsafe fn vabsq_f64(a: float64x2_t) -> float64x2_t {
+    simd_fabs(a)
 }
 
-/// Signed Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s32)
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sabdl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let d: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let e: uint32x2_t = simd_cast(vabd_s32(c, d));
-    simd_cast(e)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
+    a.wrapping_add(b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u64)
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceq_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_eq(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
+    a.wrapping_add(b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u64)
+#[doc = "Signed Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_eq(a, b)
+#[cfg_attr(test, assert_instr(saddlv))]
+pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlv.i32.v4i16"
+        )]
+        fn _vaddlv_s16(a: int16x4_t) -> i32;
+    }
+    _vaddlv_s16(a)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s64)
+#[doc = "Signed Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceq_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
-    simd_eq(a, b)
+#[cfg_attr(test, assert_instr(saddlv))]
+pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlv.i32.v8i16"
+        )]
+        fn _vaddlvq_s16(a: int16x8_t) -> i32;
+    }
+    _vaddlvq_s16(a)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s64)
+#[doc = "Signed Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
-    simd_eq(a, b)
+#[cfg_attr(test, assert_instr(saddlv))]
+pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlv.i64.v4i32"
+        )]
+        fn _vaddlvq_s32(a: int32x4_t) -> i64;
+    }
+    _vaddlvq_s32(a)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_p64)
+#[doc = "Signed Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceq_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
-    simd_eq(a, b)
+#[cfg_attr(test, assert_instr(saddlp))]
+pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlv.i64.v2i32"
+        )]
+        fn _vaddlv_s32(a: int32x2_t) -> i64;
+    }
+    _vaddlv_s32(a)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_p64)
+#[doc = "Unsigned Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
-    simd_eq(a, b)
+#[cfg_attr(test, assert_instr(uaddlv))]
+pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlv.i32.v4i16"
+        )]
+        fn _vaddlv_u16(a: int16x4_t) -> i32;
+    }
+    _vaddlv_u16(a.as_signed()).as_unsigned()
 }
 
-/// Floating-point compare equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f64)
+#[doc = "Unsigned Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceq_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    simd_eq(a, b)
+#[cfg_attr(test, assert_instr(uaddlv))]
+pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlv.i32.v8i16"
+        )]
+        fn _vaddlvq_u16(a: int16x8_t) -> i32;
+    }
+    _vaddlvq_u16(a.as_signed()).as_unsigned()
 }
 
-/// Floating-point compare equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_f64)
+#[doc = "Unsigned Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    simd_eq(a, b)
+#[cfg_attr(test, assert_instr(uaddlv))]
+pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlv.i64.v4i32"
+        )]
+        fn _vaddlvq_u32(a: int32x4_t) -> i64;
+    }
+    _vaddlvq_u32(a.as_signed()).as_unsigned()
 }
 
-/// Compare bitwise equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqd_s64)
+#[doc = "Unsigned Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqd_s64(a: i64, b: i64) -> u64 {
-    transmute(vceq_s64(transmute(a), transmute(b)))
+#[cfg_attr(test, assert_instr(uaddlp))]
+pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlv.i64.v2i32"
+        )]
+        fn _vaddlv_u32(a: int32x2_t) -> i64;
+    }
+    _vaddlv_u32(a.as_signed()).as_unsigned()
 }
 
-/// Compare bitwise equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqd_u64)
+#[doc = "Floating-point add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqd_u64(a: u64, b: u64) -> u64 {
-    transmute(vceq_u64(transmute(a), transmute(b)))
+#[cfg_attr(test, assert_instr(faddp))]
+pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddv.f32.v2f32"
+        )]
+        fn _vaddv_f32(a: float32x2_t) -> f32;
+    }
+    _vaddv_f32(a)
 }
 
-/// Floating-point compare equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqs_f32)
+#[doc = "Floating-point add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqs_f32(a: f32, b: f32) -> u32 {
-    simd_extract!(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+#[cfg_attr(test, assert_instr(faddp))]
+pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddv.f32.v4f32"
+        )]
+        fn _vaddvq_f32(a: float32x4_t) -> f32;
+    }
+    _vaddvq_f32(a)
 }
 
-/// Floating-point compare equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqd_f64)
+#[doc = "Floating-point add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqd_f64(a: f64, b: f64) -> u64 {
-    simd_extract!(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
-}
-
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_s8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_s8(a: int8x8_t) -> uint8x8_t {
-    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[cfg_attr(test, assert_instr(faddp))]
+pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddv.f64.v2f64"
+        )]
+        fn _vaddvq_f64(a: float64x2_t) -> f64;
+    }
+    _vaddvq_f64(a)
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_s8)
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_s8(a: int8x16_t) -> uint8x16_t {
-    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxs.v16i8"
+        )]
+        fn _vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    _vbcaxq_s8(a, b, c)
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_s16)
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_s16(a: int16x4_t) -> uint16x4_t {
-    let b: i16x4 = i16x4::new(0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxs.v8i16"
+        )]
+        fn _vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _vbcaxq_s16(a, b, c)
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_s16)
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_s16(a: int16x8_t) -> uint16x8_t {
-    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxs.v4i32"
+        )]
+        fn _vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vbcaxq_s32(a, b, c)
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_s32)
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_s32(a: int32x2_t) -> uint32x2_t {
-    let b: i32x2 = i32x2::new(0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxs.v2i64"
+        )]
+        fn _vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vbcaxq_s64(a, b, c)
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_s32)
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_s32(a: int32x4_t) -> uint32x4_t {
-    let b: i32x4 = i32x4::new(0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxu.v16i8"
+        )]
+        fn _vbcaxq_u8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    _vbcaxq_u8(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_s64)
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_s64(a: int64x1_t) -> uint64x1_t {
-    let b: i64x1 = i64x1::new(0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxu.v8i16"
+        )]
+        fn _vbcaxq_u16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _vbcaxq_u16(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_s64)
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_s64(a: int64x2_t) -> uint64x2_t {
-    let b: i64x2 = i64x2::new(0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxu.v4i32"
+        )]
+        fn _vbcaxq_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vbcaxq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_p8)
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_p8(a: poly8x8_t) -> uint8x8_t {
-    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxu.v2i64"
+        )]
+        fn _vbcaxq_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vbcaxq_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_p8)
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_p8(a: poly8x16_t) -> uint8x16_t {
-    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32"
+        )]
+        fn _vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vcadd_rot270_f32(a, b)
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_p64)
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_p64(a: poly64x1_t) -> uint64x1_t {
-    let b: i64x1 = i64x1::new(0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32"
+        )]
+        fn _vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vcaddq_rot270_f32(a, b)
 }
 
-/// Signed compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_p64)
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_p64(a: poly64x2_t) -> uint64x2_t {
-    let b: i64x2 = i64x2::new(0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64"
+        )]
+        fn _vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vcaddq_rot270_f64(a, b)
 }
 
-/// Unsigned compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_u8)
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_u8(a: uint8x8_t) -> uint8x8_t {
-    let b: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32"
+        )]
+        fn _vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vcadd_rot90_f32(a, b)
 }
 
-/// Unsigned compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_u8)
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_u8(a: uint8x16_t) -> uint8x16_t {
-    let b: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32"
+        )]
+        fn _vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vcaddq_rot90_f32(a, b)
 }
 
-/// Unsigned compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_u16)
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_u16(a: uint16x4_t) -> uint16x4_t {
-    let b: u16x4 = u16x4::new(0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64"
+        )]
+        fn _vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vcaddq_rot90_f64(a, b)
 }
 
-/// Unsigned compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_u16)
+#[doc = "Floating-point absolute compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
+#[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_u16(a: uint16x8_t) -> uint16x8_t {
-    let b: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facge.v1i64.v1f64"
+        )]
+        fn _vcage_f64(a: float64x1_t, b: float64x1_t) -> int64x1_t;
+    }
+    _vcage_f64(a, b).as_unsigned()
 }
 
-/// Unsigned compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_u32)
+#[doc = "Floating-point absolute compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
+#[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_u32(a: uint32x2_t) -> uint32x2_t {
-    let b: u32x2 = u32x2::new(0, 0);
-    simd_eq(a, transmute(b))
+pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facge.v2i64.v2f64"
+        )]
+        fn _vcageq_f64(a: float64x2_t, b: float64x2_t) -> int64x2_t;
+    }
+    _vcageq_f64(a, b).as_unsigned()
 }
 
-/// Unsigned compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_u32)
+#[doc = "Floating-point absolute compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaged_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
+#[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_u32(a: uint32x4_t) -> uint32x4_t {
-    let b: u32x4 = u32x4::new(0, 0, 0, 0);
-    simd_eq(a, transmute(b))
+pub unsafe fn vcaged_f64(a: f64, b: f64) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facge.i64.f64"
+        )]
+        fn _vcaged_f64(a: f64, b: f64) -> i64;
+    }
+    _vcaged_f64(a, b).as_unsigned()
 }
 
-/// Unsigned compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_u64)
+#[doc = "Floating-point absolute compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcages_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
+#[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_u64(a: uint64x1_t) -> uint64x1_t {
-    let b: u64x1 = u64x1::new(0);
-    simd_eq(a, transmute(b))
+pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facge.i32.f32"
+        )]
+        fn _vcages_f32(a: f32, b: f32) -> i32;
+    }
+    _vcages_f32(a, b).as_unsigned()
 }
 
-/// Unsigned compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_u64)
+#[doc = "Floating-point absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmeq))]
+#[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_u64(a: uint64x2_t) -> uint64x2_t {
-    let b: u64x2 = u64x2::new(0, 0);
-    simd_eq(a, transmute(b))
+pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64"
+        )]
+        fn _vcagt_f64(a: float64x1_t, b: float64x1_t) -> int64x1_t;
+    }
+    _vcagt_f64(a, b).as_unsigned()
 }
 
-/// Floating-point compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f32)
+#[doc = "Floating-point absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
+#[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_f32(a: float32x2_t) -> uint32x2_t {
-    let b: f32x2 = f32x2::new(0.0, 0.0);
-    simd_eq(a, transmute(b))
+pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64"
+        )]
+        fn _vcagtq_f64(a: float64x2_t, b: float64x2_t) -> int64x2_t;
+    }
+    _vcagtq_f64(a, b).as_unsigned()
 }
 
-/// Floating-point compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_f32)
+#[doc = "Floating-point absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
+#[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_f32(a: float32x4_t) -> uint32x4_t {
-    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
-    simd_eq(a, transmute(b))
+pub unsafe fn vcagtd_f64(a: f64, b: f64) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facgt.i64.f64"
+        )]
+        fn _vcagtd_f64(a: f64, b: f64) -> i64;
+    }
+    _vcagtd_f64(a, b).as_unsigned()
 }
 
-/// Floating-point compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f64)
+#[doc = "Floating-point absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagts_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
+#[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqz_f64(a: float64x1_t) -> uint64x1_t {
-    let b: f64 = 0.0;
-    simd_eq(a, transmute(b))
+pub unsafe fn vcagts_f32(a: f32, b: f32) -> u32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facgt.i32.f32"
+        )]
+        fn _vcagts_f32(a: f32, b: f32) -> i32;
+    }
+    _vcagts_f32(a, b).as_unsigned()
 }
 
-/// Floating-point compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_f64)
+#[doc = "Floating-point absolute compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmeq))]
+#[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzq_f64(a: float64x2_t) -> uint64x2_t {
-    let b: f64x2 = f64x2::new(0.0, 0.0);
-    simd_eq(a, transmute(b))
+pub unsafe fn vcale_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    vcage_f64(b, a)
 }
 
-/// Compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzd_s64)
+#[doc = "Floating-point absolute compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzd_s64(a: i64) -> u64 {
-    transmute(vceqz_s64(transmute(a)))
+pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    vcageq_f64(b, a)
 }
 
-/// Compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzd_u64)
+#[doc = "Floating-point absolute compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaled_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzd_u64(a: u64) -> u64 {
-    transmute(vceqz_u64(transmute(a)))
+pub unsafe fn vcaled_f64(a: f64, b: f64) -> u64 {
+    vcaged_f64(b, a)
 }
 
-/// Floating-point compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzs_f32)
+#[doc = "Floating-point absolute compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcales_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(facge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzs_f32(a: f32) -> u32 {
-    simd_extract!(vceqz_f32(vdup_n_f32(a)), 0)
+pub unsafe fn vcales_f32(a: f32, b: f32) -> u32 {
+    vcages_f32(b, a)
 }
 
-/// Floating-point compare bitwise equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzd_f64)
+#[doc = "Floating-point absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vceqzd_f64(a: f64) -> u64 {
-    simd_extract!(vceqz_f64(vdup_n_f64(a)), 0)
+pub unsafe fn vcalt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    vcagt_f64(b, a)
 }
 
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s64)
+#[doc = "Floating-point absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
+#[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
-    let c: int64x1_t = simd_and(a, b);
-    let d: i64x1 = i64x1::new(0);
-    simd_ne(c, transmute(d))
+pub unsafe fn vcaltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    vcagtq_f64(b, a)
 }
 
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s64)
+#[doc = "Floating-point absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
+#[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
-    let c: int64x2_t = simd_and(a, b);
-    let d: i64x2 = i64x2::new(0, 0);
-    simd_ne(c, transmute(d))
+pub unsafe fn vcaltd_f64(a: f64, b: f64) -> u64 {
+    vcagtd_f64(b, a)
 }
 
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p64)
+#[doc = "Floating-point absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalts_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
+#[cfg_attr(test, assert_instr(facgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
-    let c: poly64x1_t = simd_and(a, b);
-    let d: i64x1 = i64x1::new(0);
-    simd_ne(c, transmute(d))
+pub unsafe fn vcalts_f32(a: f32, b: f32) -> u32 {
+    vcagts_f32(b, a)
 }
 
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p64)
+#[doc = "Floating-point compare equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
+#[cfg_attr(test, assert_instr(fcmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
-    let c: poly64x2_t = simd_and(a, b);
-    let d: i64x2 = i64x2::new(0, 0);
-    simd_ne(c, transmute(d))
+pub unsafe fn vceq_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_eq(a, b)
 }
 
-/// Unsigned compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u64)
+#[doc = "Floating-point compare equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
+#[cfg_attr(test, assert_instr(fcmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    let c: uint64x1_t = simd_and(a, b);
-    let d: u64x1 = u64x1::new(0);
-    simd_ne(c, transmute(d))
+pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_eq(a, b)
 }
 
-/// Unsigned compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u64)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmtst))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    let c: uint64x2_t = simd_and(a, b);
-    let d: u64x2 = u64x2::new(0, 0);
-    simd_ne(c, transmute(d))
+pub unsafe fn vceq_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_eq(a, b)
 }
 
-/// Compare bitwise test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_s64)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tst))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtstd_s64(a: i64, b: i64) -> u64 {
-    transmute(vtst_s64(transmute(a), transmute(b)))
+pub unsafe fn vceqq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_eq(a, b)
 }
 
-/// Compare bitwise test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_u64)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tst))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtstd_u64(a: u64, b: u64) -> u64 {
-    transmute(vtst_u64(transmute(a), transmute(b)))
+pub unsafe fn vceq_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_eq(a, b)
 }
 
-/// Signed saturating accumulate of unsigned value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadds_s32)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqadds_s32(a: i32, b: u32) -> i32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.suqadd.i32")]
-        fn vuqadds_s32_(a: i32, b: u32) -> i32;
-    }
-    vuqadds_s32_(a, b)
+pub unsafe fn vceqq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_eq(a, b)
 }
 
-/// Signed saturating accumulate of unsigned value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqaddd_s64(a: i64, b: u64) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.suqadd.i64")]
-        fn vuqaddd_s64_(a: i64, b: u64) -> i64;
-    }
-    vuqaddd_s64_(a, b)
+pub unsafe fn vceq_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
+    simd_eq(a, b)
 }
 
-/// Signed saturating accumulate of unsigned value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddb_s8)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqaddb_s8(a: i8, b: u8) -> i8 {
-    simd_extract!(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)), 0)
+pub unsafe fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
+    simd_eq(a, b)
 }
 
-/// Signed saturating accumulate of unsigned value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)
+#[doc = "Floating-point compare equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqaddh_s16(a: i16, b: u16) -> i16 {
-    simd_extract!(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)), 0)
+pub unsafe fn vceqd_f64(a: f64, b: f64) -> u64 {
+    simd_extract!(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
 
-/// Floating-point absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f64)
+#[doc = "Floating-point compare equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fabs))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabs_f64(a: float64x1_t) -> float64x1_t {
-    simd_fabs(a)
+pub unsafe fn vceqs_f32(a: f32, b: f32) -> u32 {
+    simd_extract!(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
 
-/// Floating-point absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_f64)
+#[doc = "Compare bitwise equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fabs))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabsq_f64(a: float64x2_t) -> float64x2_t {
-    simd_fabs(a)
+pub unsafe fn vceqd_s64(a: i64, b: i64) -> u64 {
+    transmute(vceq_s64(transmute(a), transmute(b)))
 }
 
-/// Compare signed greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s64)
+#[doc = "Compare bitwise equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
-    simd_gt(a, b)
+pub unsafe fn vceqd_u64(a: u64, b: u64) -> u64 {
+    transmute(vceq_u64(transmute(a), transmute(b)))
 }
 
-/// Compare signed greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s64)
+#[doc = "Floating-point compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(fcmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
-    simd_gt(a, b)
+pub unsafe fn vceqz_f32(a: float32x2_t) -> uint32x2_t {
+    let b: f32x2 = f32x2::new(0.0, 0.0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare unsigned greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u64)
+#[doc = "Floating-point compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhi))]
+#[cfg_attr(test, assert_instr(fcmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_gt(a, b)
+pub unsafe fn vceqzq_f32(a: float32x4_t) -> uint32x4_t {
+    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare unsigned greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u64)
+#[doc = "Floating-point compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhi))]
+#[cfg_attr(test, assert_instr(fcmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_gt(a, b)
+pub unsafe fn vceqz_f64(a: float64x1_t) -> uint64x1_t {
+    let b: f64 = 0.0;
+    simd_eq(a, transmute(b))
 }
 
-/// Floating-point compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f64)
+#[doc = "Floating-point compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(fcmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    simd_gt(a, b)
+pub unsafe fn vceqzq_f64(a: float64x2_t) -> uint64x2_t {
+    let b: f64x2 = f64x2::new(0.0, 0.0);
+    simd_eq(a, transmute(b))
 }
 
-/// Floating-point compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_f64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    simd_gt(a, b)
+pub unsafe fn vceqz_s8(a: int8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtd_s64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtd_s64(a: i64, b: i64) -> u64 {
-    transmute(vcgt_s64(transmute(a), transmute(b)))
+pub unsafe fn vceqzq_s8(a: int8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtd_u64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtd_u64(a: u64, b: u64) -> u64 {
-    transmute(vcgt_u64(transmute(a), transmute(b)))
+pub unsafe fn vceqz_s16(a: int16x4_t) -> uint16x4_t {
+    let b: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Floating-point compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgts_f32)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgts_f32(a: f32, b: f32) -> u32 {
-    simd_extract!(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+pub unsafe fn vceqzq_s16(a: int16x8_t) -> uint16x8_t {
+    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Floating-point compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtd_f64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtd_f64(a: f64, b: f64) -> u64 {
-    simd_extract!(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+pub unsafe fn vceqz_s32(a: int32x2_t) -> uint32x2_t {
+    let b: i32x2 = i32x2::new(0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare signed less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
-    simd_lt(a, b)
+pub unsafe fn vceqzq_s32(a: int32x4_t) -> uint32x4_t {
+    let b: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare signed less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
-    simd_lt(a, b)
+pub unsafe fn vceqz_s64(a: int64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare unsigned less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhi))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_lt(a, b)
+pub unsafe fn vceqzq_s64(a: int64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare unsigned less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhi))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_lt(a, b)
+pub unsafe fn vceqz_p8(a: poly8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Floating-point compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    simd_lt(a, b)
+pub unsafe fn vceqzq_p8(a: poly8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Floating-point compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_f64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmgt))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    simd_lt(a, b)
+pub unsafe fn vceqz_p64(a: poly64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltd_s64)
+#[doc = "Signed compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltd_s64(a: i64, b: i64) -> u64 {
-    transmute(vclt_s64(transmute(a), transmute(b)))
+pub unsafe fn vceqzq_p64(a: poly64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltd_u64)
+#[doc = "Unsigned compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltd_u64(a: u64, b: u64) -> u64 {
-    transmute(vclt_u64(transmute(a), transmute(b)))
+pub unsafe fn vceqz_u8(a: uint8x8_t) -> uint8x8_t {
+    let b: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Floating-point compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclts_f32)
+#[doc = "Unsigned compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclts_f32(a: f32, b: f32) -> u32 {
-    simd_extract!(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+pub unsafe fn vceqzq_u8(a: uint8x16_t) -> uint8x16_t {
+    let b: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Floating-point compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltd_f64)
+#[doc = "Unsigned compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltd_f64(a: f64, b: f64) -> u64 {
-    simd_extract!(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+pub unsafe fn vceqz_u16(a: uint16x4_t) -> uint16x4_t {
+    let b: u16x4 = u16x4::new(0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare signed less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s64)
+#[doc = "Unsigned compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmge))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcle_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
-    simd_le(a, b)
+pub unsafe fn vceqzq_u16(a: uint16x8_t) -> uint16x8_t {
+    let b: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare signed less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s64)
+#[doc = "Unsigned compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmge))]
+#[cfg_attr(test, assert_instr(cmeq))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcleq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
-    simd_le(a, b)
+pub unsafe fn vceqz_u32(a: uint32x2_t) -> uint32x2_t {
+    let b: u32x2 = u32x2::new(0, 0);
+    simd_eq(a, transmute(b))
+}
+
+#[doc = "Unsigned compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_u32(a: uint32x4_t) -> uint32x4_t {
+    let b: u32x4 = u32x4::new(0, 0, 0, 0);
+    simd_eq(a, transmute(b))
+}
+
+#[doc = "Unsigned compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqz_u64(a: uint64x1_t) -> uint64x1_t {
+    let b: u64x1 = u64x1::new(0);
+    simd_eq(a, transmute(b))
+}
+
+#[doc = "Unsigned compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmeq))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vceqzq_u64(a: uint64x2_t) -> uint64x2_t {
+    let b: u64x2 = u64x2::new(0, 0);
+    simd_eq(a, transmute(b))
 }
 
-/// Compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcged_s64)
+#[doc = "Compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcged_s64(a: i64, b: i64) -> u64 {
-    transmute(vcge_s64(transmute(a), transmute(b)))
+pub unsafe fn vceqzd_s64(a: i64) -> u64 {
+    transmute(vceqz_s64(transmute(a)))
 }
 
-/// Compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcged_u64)
+#[doc = "Compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcged_u64(a: u64, b: u64) -> u64 {
-    transmute(vcge_u64(transmute(a), transmute(b)))
+pub unsafe fn vceqzd_u64(a: u64) -> u64 {
+    transmute(vceqz_u64(transmute(a)))
 }
 
-/// Floating-point compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcges_f32)
+#[doc = "Floating-point compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcges_f32(a: f32, b: f32) -> u32 {
-    simd_extract!(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+pub unsafe fn vceqzs_f32(a: f32) -> u32 {
+    simd_extract!(vceqz_f32(vdup_n_f32(a)), 0)
 }
 
-/// Floating-point compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcged_f64)
+#[doc = "Floating-point compare bitwise equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcged_f64(a: f64, b: f64) -> u64 {
-    simd_extract!(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+pub unsafe fn vceqzd_f64(a: f64) -> u64 {
+    simd_extract!(vceqz_f64(vdup_n_f64(a)), 0)
 }
 
-/// Compare unsigned less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u64)
+#[doc = "Floating-point compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhs))]
+#[cfg_attr(test, assert_instr(fcmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcle_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_le(a, b)
+pub unsafe fn vcge_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_ge(a, b)
 }
 
-/// Compare unsigned less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u64)
+#[doc = "Floating-point compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhs))]
+#[cfg_attr(test, assert_instr(fcmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_le(a, b)
+pub unsafe fn vcgeq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_ge(a, b)
 }
 
-/// Floating-point compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f64)
+#[doc = "Compare signed greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(cmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcle_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    simd_le(a, b)
+pub unsafe fn vcge_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_ge(a, b)
 }
 
-/// Floating-point compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_f64)
+#[doc = "Compare signed greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(cmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    simd_le(a, b)
+pub unsafe fn vcgeq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_ge(a, b)
 }
 
-/// Compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcled_s64)
+#[doc = "Compare unsigned greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(cmhs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcled_s64(a: i64, b: i64) -> u64 {
-    transmute(vcle_s64(transmute(a), transmute(b)))
+pub unsafe fn vcge_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_ge(a, b)
 }
 
-/// Compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcled_u64)
+#[doc = "Compare unsigned greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(cmhs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcled_u64(a: u64, b: u64) -> u64 {
-    transmute(vcle_u64(transmute(a), transmute(b)))
+pub unsafe fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_ge(a, b)
 }
 
-/// Floating-point compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcles_f32)
+#[doc = "Floating-point compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcged_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcles_f32(a: f32, b: f32) -> u32 {
-    simd_extract!(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
+pub unsafe fn vcged_f64(a: f64, b: f64) -> u64 {
+    simd_extract!(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
 
-/// Floating-point compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcled_f64)
+#[doc = "Floating-point compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcges_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcled_f64(a: f64, b: f64) -> u64 {
-    simd_extract!(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
+pub unsafe fn vcges_f32(a: f32, b: f32) -> u32 {
+    simd_extract!(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
 
-/// Compare signed greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s64)
+#[doc = "Compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcged_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmge))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcge_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
-    simd_ge(a, b)
+pub unsafe fn vcged_s64(a: i64, b: i64) -> u64 {
+    transmute(vcge_s64(transmute(a), transmute(b)))
 }
 
-/// Compare signed greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s64)
+#[doc = "Compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcged_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmge))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgeq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
-    simd_ge(a, b)
+pub unsafe fn vcged_u64(a: u64, b: u64) -> u64 {
+    transmute(vcge_u64(transmute(a), transmute(b)))
 }
 
-/// Compare unsigned greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u64)
+#[doc = "Floating-point compare greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhs))]
+#[cfg_attr(test, assert_instr(fcmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcge_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_ge(a, b)
+pub unsafe fn vcgez_f32(a: float32x2_t) -> uint32x2_t {
+    let b: f32x2 = f32x2::new(0.0, 0.0);
+    simd_ge(a, transmute(b))
 }
 
-/// Compare unsigned greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u64)
+#[doc = "Floating-point compare greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmhs))]
+#[cfg_attr(test, assert_instr(fcmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_ge(a, b)
+pub unsafe fn vcgezq_f32(a: float32x4_t) -> uint32x4_t {
+    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
+    simd_ge(a, transmute(b))
 }
 
-/// Floating-point compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f64)
+#[doc = "Floating-point compare greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcge_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    simd_ge(a, b)
+pub unsafe fn vcgez_f64(a: float64x1_t) -> uint64x1_t {
+    let b: f64 = 0.0;
+    simd_ge(a, transmute(b))
 }
 
-/// Floating-point compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_f64)
+#[doc = "Floating-point compare greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgeq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    simd_ge(a, b)
+pub unsafe fn vcgezq_f64(a: float64x2_t) -> uint64x2_t {
+    let b: f64x2 = f64x2::new(0.0, 0.0);
+    simd_ge(a, transmute(b))
 }
 
-/// Compare signed greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_s8)
+#[doc = "Compare signed greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
@@ -1364,9 +1714,10 @@ pub unsafe fn vcgez_s8(a: int8x8_t) -> uint8x8_t {
     simd_ge(a, transmute(b))
 }
 
-/// Compare signed greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_s8)
+#[doc = "Compare signed greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
@@ -1376,9 +1727,10 @@ pub unsafe fn vcgezq_s8(a: int8x16_t) -> uint8x16_t {
     simd_ge(a, transmute(b))
 }
 
-/// Compare signed greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_s16)
+#[doc = "Compare signed greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
@@ -1388,9 +1740,10 @@ pub unsafe fn vcgez_s16(a: int16x4_t) -> uint16x4_t {
     simd_ge(a, transmute(b))
 }
 
-/// Compare signed greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_s16)
+#[doc = "Compare signed greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
@@ -1400,9 +1753,10 @@ pub unsafe fn vcgezq_s16(a: int16x8_t) -> uint16x8_t {
     simd_ge(a, transmute(b))
 }
 
-/// Compare signed greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_s32)
+#[doc = "Compare signed greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
@@ -1412,9 +1766,10 @@ pub unsafe fn vcgez_s32(a: int32x2_t) -> uint32x2_t {
     simd_ge(a, transmute(b))
 }
 
-/// Compare signed greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_s32)
+#[doc = "Compare signed greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
@@ -1424,9 +1779,10 @@ pub unsafe fn vcgezq_s32(a: int32x4_t) -> uint32x4_t {
     simd_ge(a, transmute(b))
 }
 
-/// Compare signed greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_s64)
+#[doc = "Compare signed greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
@@ -1436,9 +1792,10 @@ pub unsafe fn vcgez_s64(a: int64x1_t) -> uint64x1_t {
     simd_ge(a, transmute(b))
 }
 
-/// Compare signed greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_s64)
+#[doc = "Compare signed greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmge))]
@@ -1448,57 +1805,34 @@ pub unsafe fn vcgezq_s64(a: int64x2_t) -> uint64x2_t {
     simd_ge(a, transmute(b))
 }
 
-/// Floating-point compare greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgez_f32(a: float32x2_t) -> uint32x2_t {
-    let b: f32x2 = f32x2::new(0.0, 0.0);
-    simd_ge(a, transmute(b))
-}
-
-/// Floating-point compare greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgezq_f32(a: float32x4_t) -> uint32x4_t {
-    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
-    simd_ge(a, transmute(b))
-}
-
-/// Floating-point compare greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f64)
+#[doc = "Floating-point compare greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgez_f64(a: float64x1_t) -> uint64x1_t {
-    let b: f64 = 0.0;
-    simd_ge(a, transmute(b))
+pub unsafe fn vcgezd_f64(a: f64) -> u64 {
+    simd_extract!(vcgez_f64(vdup_n_f64(a)), 0)
 }
 
-/// Floating-point compare greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezq_f64)
+#[doc = "Floating-point compare greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmge))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgezq_f64(a: float64x2_t) -> uint64x2_t {
-    let b: f64x2 = f64x2::new(0.0, 0.0);
-    simd_ge(a, transmute(b))
+pub unsafe fn vcgezs_f32(a: f32) -> u32 {
+    simd_extract!(vcgez_f32(vdup_n_f32(a)), 0)
 }
 
-/// Compare signed greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezd_s64)
+#[doc = "Compare signed greater than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(nop))]
@@ -1507,127 +1841,130 @@ pub unsafe fn vcgezd_s64(a: i64) -> u64 {
     transmute(vcgez_s64(transmute(a)))
 }
 
-/// Floating-point compare greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezs_f32)
+#[doc = "Floating-point compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(fcmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgezs_f32(a: f32) -> u32 {
-    simd_extract!(vcgez_f32(vdup_n_f32(a)), 0)
+pub unsafe fn vcgt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_gt(a, b)
 }
 
-/// Floating-point compare greater than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezd_f64)
+#[doc = "Floating-point compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(fcmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgezd_f64(a: f64) -> u64 {
-    simd_extract!(vcgez_f64(vdup_n_f64(a)), 0)
+pub unsafe fn vcgtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_gt(a, b)
 }
 
-/// Compare signed greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_s8)
+#[doc = "Compare signed greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtz_s8(a: int8x8_t) -> uint8x8_t {
-    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_gt(a, transmute(b))
+pub unsafe fn vcgt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_gt(a, b)
 }
 
-/// Compare signed greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_s8)
+#[doc = "Compare signed greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtzq_s8(a: int8x16_t) -> uint8x16_t {
-    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-    simd_gt(a, transmute(b))
+pub unsafe fn vcgtq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_gt(a, b)
 }
 
-/// Compare signed greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_s16)
+#[doc = "Compare unsigned greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmhi))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtz_s16(a: int16x4_t) -> uint16x4_t {
-    let b: i16x4 = i16x4::new(0, 0, 0, 0);
-    simd_gt(a, transmute(b))
+pub unsafe fn vcgt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_gt(a, b)
 }
 
-/// Compare signed greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_s16)
+#[doc = "Compare unsigned greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmhi))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtzq_s16(a: int16x8_t) -> uint16x8_t {
-    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_gt(a, transmute(b))
+pub unsafe fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_gt(a, b)
 }
 
-/// Compare signed greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_s32)
+#[doc = "Floating-point compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtz_s32(a: int32x2_t) -> uint32x2_t {
-    let b: i32x2 = i32x2::new(0, 0);
-    simd_gt(a, transmute(b))
+pub unsafe fn vcgtd_f64(a: f64, b: f64) -> u64 {
+    simd_extract!(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
 
-/// Compare signed greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_s32)
+#[doc = "Floating-point compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgts_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtzq_s32(a: int32x4_t) -> uint32x4_t {
-    let b: i32x4 = i32x4::new(0, 0, 0, 0);
-    simd_gt(a, transmute(b))
+pub unsafe fn vcgts_f32(a: f32, b: f32) -> u32 {
+    simd_extract!(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
 
-/// Compare signed greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_s64)
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtz_s64(a: int64x1_t) -> uint64x1_t {
-    let b: i64x1 = i64x1::new(0);
-    simd_gt(a, transmute(b))
+pub unsafe fn vcgtd_s64(a: i64, b: i64) -> u64 {
+    transmute(vcgt_s64(transmute(a), transmute(b)))
 }
 
-/// Compare signed greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_s64)
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtzq_s64(a: int64x2_t) -> uint64x2_t {
-    let b: i64x2 = i64x2::new(0, 0);
-    simd_gt(a, transmute(b))
+pub unsafe fn vcgtd_u64(a: u64, b: u64) -> u64 {
+    transmute(vcgt_u64(transmute(a), transmute(b)))
 }
 
-/// Floating-point compare greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f32)
+#[doc = "Floating-point compare greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmgt))]
@@ -1637,9 +1974,10 @@ pub unsafe fn vcgtz_f32(a: float32x2_t) -> uint32x2_t {
     simd_gt(a, transmute(b))
 }
 
-/// Floating-point compare greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_f32)
+#[doc = "Floating-point compare greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmgt))]
@@ -1649,9 +1987,10 @@ pub unsafe fn vcgtzq_f32(a: float32x4_t) -> uint32x4_t {
     simd_gt(a, transmute(b))
 }
 
-/// Floating-point compare greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f64)
+#[doc = "Floating-point compare greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmgt))]
@@ -1661,9 +2000,10 @@ pub unsafe fn vcgtz_f64(a: float64x1_t) -> uint64x1_t {
     simd_gt(a, transmute(b))
 }
 
-/// Floating-point compare greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_f64)
+#[doc = "Floating-point compare greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmgt))]
@@ -1673,1332 +2013,1834 @@ pub unsafe fn vcgtzq_f64(a: float64x2_t) -> uint64x2_t {
     simd_gt(a, transmute(b))
 }
 
-/// Compare signed greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzd_s64)
+#[doc = "Compare signed greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtzd_s64(a: i64) -> u64 {
-    transmute(vcgtz_s64(transmute(a)))
+pub unsafe fn vcgtz_s8(a: int8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_gt(a, transmute(b))
 }
 
-/// Floating-point compare greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzs_f32)
+#[doc = "Compare signed greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtzs_f32(a: f32) -> u32 {
-    simd_extract!(vcgtz_f32(vdup_n_f32(a)), 0)
+pub unsafe fn vcgtzq_s8(a: int8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_gt(a, transmute(b))
 }
 
-/// Floating-point compare greater than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzd_f64)
+#[doc = "Compare signed greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcgtzd_f64(a: f64) -> u64 {
-    simd_extract!(vcgtz_f64(vdup_n_f64(a)), 0)
+pub unsafe fn vcgtz_s16(a: int16x4_t) -> uint16x4_t {
+    let b: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_gt(a, transmute(b))
 }
 
-/// Compare signed less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_s8)
+#[doc = "Compare signed greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmle))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclez_s8(a: int8x8_t) -> uint8x8_t {
-    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcgtzq_s16(a: int16x8_t) -> uint16x8_t {
+    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_gt(a, transmute(b))
 }
 
-/// Compare signed less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_s8)
+#[doc = "Compare signed greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmle))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclezq_s8(a: int8x16_t) -> uint8x16_t {
-    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcgtz_s32(a: int32x2_t) -> uint32x2_t {
+    let b: i32x2 = i32x2::new(0, 0);
+    simd_gt(a, transmute(b))
 }
 
-/// Compare signed less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_s16)
+#[doc = "Compare signed greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmle))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclez_s16(a: int16x4_t) -> uint16x4_t {
-    let b: i16x4 = i16x4::new(0, 0, 0, 0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcgtzq_s32(a: int32x4_t) -> uint32x4_t {
+    let b: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_gt(a, transmute(b))
 }
 
-/// Compare signed less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_s16)
+#[doc = "Compare signed greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmle))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclezq_s16(a: int16x8_t) -> uint16x8_t {
-    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcgtz_s64(a: int64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_gt(a, transmute(b))
 }
 
-/// Compare signed less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_s32)
+#[doc = "Compare signed greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmle))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclez_s32(a: int32x2_t) -> uint32x2_t {
-    let b: i32x2 = i32x2::new(0, 0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcgtzq_s64(a: int64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_gt(a, transmute(b))
 }
 
-/// Compare signed less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_s32)
+#[doc = "Floating-point compare greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmle))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclezq_s32(a: int32x4_t) -> uint32x4_t {
-    let b: i32x4 = i32x4::new(0, 0, 0, 0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcgtzd_f64(a: f64) -> u64 {
+    simd_extract!(vcgtz_f64(vdup_n_f64(a)), 0)
 }
 
-/// Compare signed less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_s64)
+#[doc = "Floating-point compare greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmle))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclez_s64(a: int64x1_t) -> uint64x1_t {
-    let b: i64x1 = i64x1::new(0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcgtzs_f32(a: f32) -> u32 {
+    simd_extract!(vcgtz_f32(vdup_n_f32(a)), 0)
 }
 
-/// Compare signed less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_s64)
+#[doc = "Compare signed greater than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmle))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclezq_s64(a: int64x2_t) -> uint64x2_t {
-    let b: i64x2 = i64x2::new(0, 0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcgtzd_s64(a: i64) -> u64 {
+    transmute(vcgtz_s64(transmute(a)))
 }
 
-/// Floating-point compare less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f32)
+#[doc = "Floating-point compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmle))]
+#[cfg_attr(test, assert_instr(fcmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclez_f32(a: float32x2_t) -> uint32x2_t {
-    let b: f32x2 = f32x2::new(0.0, 0.0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcle_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_le(a, b)
 }
 
-/// Floating-point compare less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_f32)
+#[doc = "Floating-point compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmle))]
+#[cfg_attr(test, assert_instr(fcmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclezq_f32(a: float32x4_t) -> uint32x4_t {
-    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_le(a, b)
 }
 
-/// Floating-point compare less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f64)
+#[doc = "Compare signed less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmle))]
+#[cfg_attr(test, assert_instr(cmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclez_f64(a: float64x1_t) -> uint64x1_t {
-    let b: f64 = 0.0;
-    simd_le(a, transmute(b))
+pub unsafe fn vcle_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_le(a, b)
 }
 
-/// Floating-point compare less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_f64)
+#[doc = "Compare signed less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmle))]
+#[cfg_attr(test, assert_instr(cmge))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclezq_f64(a: float64x2_t) -> uint64x2_t {
-    let b: f64x2 = f64x2::new(0.0, 0.0);
-    simd_le(a, transmute(b))
+pub unsafe fn vcleq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_le(a, b)
 }
 
-/// Compare less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezd_s64)
+#[doc = "Compare unsigned less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmp))]
+#[cfg_attr(test, assert_instr(cmhs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclezd_s64(a: i64) -> u64 {
-    transmute(vclez_s64(transmute(a)))
+pub unsafe fn vcle_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_le(a, b)
+}
+
+#[doc = "Compare unsigned less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(cmhs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_le(a, b)
 }
 
-/// Floating-point compare less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezs_f32)
+#[doc = "Floating-point compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcled_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclezs_f32(a: f32) -> u32 {
-    simd_extract!(vclez_f32(vdup_n_f32(a)), 0)
+pub unsafe fn vcled_f64(a: f64, b: f64) -> u64 {
+    simd_extract!(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
 
-/// Floating-point compare less than or equal to zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezd_f64)
+#[doc = "Floating-point compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcles_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vclezd_f64(a: f64) -> u64 {
-    simd_extract!(vclez_f64(vdup_n_f64(a)), 0)
+pub unsafe fn vcles_f32(a: f32, b: f32) -> u32 {
+    simd_extract!(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
 
-/// Compare signed less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_s8)
+#[doc = "Compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcled_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmlt))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
-    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vcled_u64(a: u64, b: u64) -> u64 {
+    transmute(vcle_u64(transmute(a), transmute(b)))
 }
 
-/// Compare signed less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_s8)
+#[doc = "Compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcled_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmlt))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
-    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vcled_s64(a: i64, b: i64) -> u64 {
+    transmute(vcle_s64(transmute(a), transmute(b)))
 }
 
-/// Compare signed less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_s16)
+#[doc = "Floating-point compare less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmlt))]
+#[cfg_attr(test, assert_instr(fcmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
-    let b: i16x4 = i16x4::new(0, 0, 0, 0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vclez_f32(a: float32x2_t) -> uint32x2_t {
+    let b: f32x2 = f32x2::new(0.0, 0.0);
+    simd_le(a, transmute(b))
 }
 
-/// Compare signed less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_s16)
+#[doc = "Floating-point compare less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmlt))]
+#[cfg_attr(test, assert_instr(fcmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
-    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vclezq_f32(a: float32x4_t) -> uint32x4_t {
+    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
+    simd_le(a, transmute(b))
 }
 
-/// Compare signed less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_s32)
+#[doc = "Floating-point compare less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmlt))]
+#[cfg_attr(test, assert_instr(fcmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
-    let b: i32x2 = i32x2::new(0, 0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vclez_f64(a: float64x1_t) -> uint64x1_t {
+    let b: f64 = 0.0;
+    simd_le(a, transmute(b))
 }
 
-/// Compare signed less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_s32)
+#[doc = "Floating-point compare less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmlt))]
+#[cfg_attr(test, assert_instr(fcmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
-    let b: i32x4 = i32x4::new(0, 0, 0, 0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vclezq_f64(a: float64x2_t) -> uint64x2_t {
+    let b: f64x2 = f64x2::new(0.0, 0.0);
+    simd_le(a, transmute(b))
 }
 
-/// Compare signed less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_s64)
+#[doc = "Compare signed less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmlt))]
+#[cfg_attr(test, assert_instr(cmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
-    let b: i64x1 = i64x1::new(0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vclez_s8(a: int8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_le(a, transmute(b))
 }
 
-/// Compare signed less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_s64)
+#[doc = "Compare signed less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(cmlt))]
+#[cfg_attr(test, assert_instr(cmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltzq_s64(a: int64x2_t) -> uint64x2_t {
-    let b: i64x2 = i64x2::new(0, 0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vclezq_s8(a: int8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_le(a, transmute(b))
 }
 
-/// Floating-point compare less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f32)
+#[doc = "Compare signed less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmlt))]
+#[cfg_attr(test, assert_instr(cmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltz_f32(a: float32x2_t) -> uint32x2_t {
-    let b: f32x2 = f32x2::new(0.0, 0.0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vclez_s16(a: int16x4_t) -> uint16x4_t {
+    let b: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_le(a, transmute(b))
 }
 
-/// Floating-point compare less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_f32)
+#[doc = "Compare signed less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmlt))]
+#[cfg_attr(test, assert_instr(cmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltzq_f32(a: float32x4_t) -> uint32x4_t {
-    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vclezq_s16(a: int16x8_t) -> uint16x8_t {
+    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_le(a, transmute(b))
 }
 
-/// Floating-point compare less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f64)
+#[doc = "Compare signed less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmlt))]
+#[cfg_attr(test, assert_instr(cmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltz_f64(a: float64x1_t) -> uint64x1_t {
-    let b: f64 = 0.0;
-    simd_lt(a, transmute(b))
+pub unsafe fn vclez_s32(a: int32x2_t) -> uint32x2_t {
+    let b: i32x2 = i32x2::new(0, 0);
+    simd_le(a, transmute(b))
 }
 
-/// Floating-point compare less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_f64)
+#[doc = "Compare signed less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmlt))]
+#[cfg_attr(test, assert_instr(cmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltzq_f64(a: float64x2_t) -> uint64x2_t {
-    let b: f64x2 = f64x2::new(0.0, 0.0);
-    simd_lt(a, transmute(b))
+pub unsafe fn vclezq_s32(a: int32x4_t) -> uint32x4_t {
+    let b: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_le(a, transmute(b))
 }
 
-/// Compare less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzd_s64)
+#[doc = "Compare signed less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(asr))]
+#[cfg_attr(test, assert_instr(cmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltzd_s64(a: i64) -> u64 {
-    transmute(vcltz_s64(transmute(a)))
+pub unsafe fn vclez_s64(a: int64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_le(a, transmute(b))
 }
 
-/// Floating-point compare less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzs_f32)
+#[doc = "Compare signed less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcmp))]
+#[cfg_attr(test, assert_instr(cmle))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltzs_f32(a: f32) -> u32 {
-    simd_extract!(vcltz_f32(vdup_n_f32(a)), 0)
+pub unsafe fn vclezq_s64(a: int64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_le(a, transmute(b))
 }
 
-/// Floating-point compare less than zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzd_f64)
+#[doc = "Floating-point compare less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcltzd_f64(a: f64) -> u64 {
-    simd_extract!(vcltz_f64(vdup_n_f64(a)), 0)
+pub unsafe fn vclezd_f64(a: f64) -> u64 {
+    simd_extract!(vclez_f64(vdup_n_f64(a)), 0)
 }
 
-/// Floating-point absolute compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f64)
+#[doc = "Floating-point compare less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64")]
-        fn vcagt_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
-    }
-    vcagt_f64_(a, b)
+pub unsafe fn vclezs_f32(a: f32) -> u32 {
+    simd_extract!(vclez_f32(vdup_n_f32(a)), 0)
 }
 
-/// Floating-point absolute compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtq_f64)
+#[doc = "Compare less than or equal to zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64")]
-        fn vcagtq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
-    }
-    vcagtq_f64_(a, b)
+pub unsafe fn vclezd_s64(a: i64) -> u64 {
+    transmute(vclez_s64(transmute(a)))
 }
 
-/// Floating-point absolute compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagts_f32)
+#[doc = "Floating-point compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facgt))]
+#[cfg_attr(test, assert_instr(fcmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcagts_f32(a: f32, b: f32) -> u32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.i32.f32")]
-        fn vcagts_f32_(a: f32, b: f32) -> u32;
-    }
-    vcagts_f32_(a, b)
+pub unsafe fn vclt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
+    simd_lt(a, b)
 }
 
-/// Floating-point absolute compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtd_f64)
+#[doc = "Floating-point compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facgt))]
+#[cfg_attr(test, assert_instr(fcmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcagtd_f64(a: f64, b: f64) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.i64.f64")]
-        fn vcagtd_f64_(a: f64, b: f64) -> u64;
-    }
-    vcagtd_f64_(a, b)
+pub unsafe fn vcltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
+    simd_lt(a, b)
 }
 
-/// Floating-point absolute compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f64)
+#[doc = "Compare signed less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facge))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.v1i64.v1f64")]
-        fn vcage_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
-    }
-    vcage_f64_(a, b)
+pub unsafe fn vclt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    simd_lt(a, b)
 }
 
-/// Floating-point absolute compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageq_f64)
+#[doc = "Compare signed less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facge))]
+#[cfg_attr(test, assert_instr(cmgt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.v2i64.v2f64")]
-        fn vcageq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
-    }
-    vcageq_f64_(a, b)
+pub unsafe fn vcltq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    simd_lt(a, b)
 }
 
-/// Floating-point absolute compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcages_f32)
+#[doc = "Compare unsigned less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facge))]
+#[cfg_attr(test, assert_instr(cmhi))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcages_f32(a: f32, b: f32) -> u32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.i32.f32")]
-        fn vcages_f32_(a: f32, b: f32) -> u32;
-    }
-    vcages_f32_(a, b)
+pub unsafe fn vclt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_lt(a, b)
 }
 
-/// Floating-point absolute compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaged_f64)
+#[doc = "Compare unsigned less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facge))]
+#[cfg_attr(test, assert_instr(cmhi))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcaged_f64(a: f64, b: f64) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.i64.f64")]
-        fn vcaged_f64_(a: f64, b: f64) -> u64;
-    }
-    vcaged_f64_(a, b)
+pub unsafe fn vcltq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_lt(a, b)
 }
 
-/// Floating-point absolute compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f64)
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcalt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    vcagt_f64(b, a)
+pub unsafe fn vcltd_u64(a: u64, b: u64) -> u64 {
+    transmute(vclt_u64(transmute(a), transmute(b)))
 }
 
-/// Floating-point absolute compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltq_f64)
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facgt))]
+#[cfg_attr(test, assert_instr(cmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcaltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    vcagtq_f64(b, a)
+pub unsafe fn vcltd_s64(a: i64, b: i64) -> u64 {
+    transmute(vclt_s64(transmute(a), transmute(b)))
 }
 
-/// Floating-point absolute compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalts_f32)
+#[doc = "Floating-point compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclts_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcalts_f32(a: f32, b: f32) -> u32 {
-    vcagts_f32(b, a)
+pub unsafe fn vclts_f32(a: f32, b: f32) -> u32 {
+    simd_extract!(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0)
 }
 
-/// Floating-point absolute compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltd_f64)
+#[doc = "Floating-point compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facgt))]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcaltd_f64(a: f64, b: f64) -> u64 {
-    vcagtd_f64(b, a)
+pub unsafe fn vcltd_f64(a: f64, b: f64) -> u64 {
+    simd_extract!(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0)
 }
 
-/// Floating-point absolute compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f64)
+#[doc = "Floating-point compare less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facge))]
+#[cfg_attr(test, assert_instr(fcmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcale_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
-    vcage_f64(b, a)
+pub unsafe fn vcltz_f32(a: float32x2_t) -> uint32x2_t {
+    let b: f32x2 = f32x2::new(0.0, 0.0);
+    simd_lt(a, transmute(b))
 }
 
-/// Floating-point absolute compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleq_f64)
+#[doc = "Floating-point compare less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facge))]
+#[cfg_attr(test, assert_instr(fcmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
-    vcageq_f64(b, a)
+pub unsafe fn vcltzq_f32(a: float32x4_t) -> uint32x4_t {
+    let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
+    simd_lt(a, transmute(b))
 }
 
-/// Floating-point absolute compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcales_f32)
+#[doc = "Floating-point compare less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facge))]
+#[cfg_attr(test, assert_instr(fcmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcales_f32(a: f32, b: f32) -> u32 {
-    vcages_f32(b, a)
+pub unsafe fn vcltz_f64(a: float64x1_t) -> uint64x1_t {
+    let b: f64 = 0.0;
+    simd_lt(a, transmute(b))
 }
 
-/// Floating-point absolute compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaled_f64)
+#[doc = "Floating-point compare less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(facge))]
+#[cfg_attr(test, assert_instr(fcmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcaled_f64(a: f64, b: f64) -> u64 {
-    vcaged_f64(b, a)
+pub unsafe fn vcltzq_f64(a: float64x2_t) -> uint64x2_t {
+    let b: f64x2 = f64x2::new(0.0, 0.0);
+    simd_lt(a, transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s8)
+#[doc = "Compare signed less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(cmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
+    let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_lt(a, transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s8)
+#[doc = "Compare signed less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(cmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(LANE1, 4);
-    static_assert_uimm_bits!(LANE2, 4);
-    match LANE1 & 0b1111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
-        8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
-        9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
-        10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
-        11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
-        12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
-        13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
-        14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
-        15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
+    let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_lt(a, transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s16)
+#[doc = "Compare signed less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(cmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 2);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
+    let b: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_lt(a, transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s16)
+#[doc = "Compare signed less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(cmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
+    let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_lt(a, transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s32)
+#[doc = "Compare signed less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(cmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
+    let b: i32x2 = i32x2::new(0, 0);
+    simd_lt(a, transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s32)
+#[doc = "Compare signed less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(cmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 2);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
+    let b: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_lt(a, transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s64)
+#[doc = "Compare signed less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(cmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
+    let b: i64x1 = i64x1::new(0);
+    simd_lt(a, transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u8)
+#[doc = "Compare signed less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(cmlt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltzq_s64(a: int64x2_t) -> uint64x2_t {
+    let b: i64x2 = i64x2::new(0, 0);
+    simd_lt(a, transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u8)
+#[doc = "Floating-point compare less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(LANE1, 4);
-    static_assert_uimm_bits!(LANE2, 4);
-    match LANE1 & 0b1111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
-        8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
-        9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
-        10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
-        11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
-        12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
-        13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
-        14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
-        15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltzd_f64(a: f64) -> u64 {
+    simd_extract!(vcltz_f64(vdup_n_f64(a)), 0)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u16)
+#[doc = "Floating-point compare less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 2);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltzs_f32(a: f32) -> u32 {
+    simd_extract!(vcltz_f32(vdup_n_f32(a)), 0)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u16)
+#[doc = "Compare less than zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(asr))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vcltzd_s64(a: i64) -> u64 {
+    transmute(vcltz_s64(transmute(a)))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u32)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32"
+        )]
+        fn _vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
+    _vcmla_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u32)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 2);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32"
+        )]
+        fn _vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
+    _vcmlaq_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u64)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64"
+        )]
+        fn _vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
+    _vcmlaq_f64(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p8)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmla_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert!(LANE == 0);
+    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p8)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(LANE1, 4);
-    static_assert_uimm_bits!(LANE2, 4);
-    match LANE1 & 0b1111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
-        8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
-        9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
-        10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
-        11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
-        12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
-        13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
-        14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
-        15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmlaq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert!(LANE == 0);
+    let c: float32x4_t = simd_shuffle!(
+        c,
+        c,
+        [
+            2 * LANE as u32,
+            2 * LANE as u32 + 1,
+            2 * LANE as u32,
+            2 * LANE as u32 + 1
+        ]
+    );
+    vcmlaq_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p16)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 2);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmla_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p16)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmlaq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: float32x4_t = simd_shuffle!(
+        c,
+        c,
+        [
+            2 * LANE as u32,
+            2 * LANE as u32 + 1,
+            2 * LANE as u32,
+            2 * LANE as u32 + 1
+        ]
+    );
+    vcmlaq_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32"
+        )]
+        fn _vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
+    _vcmla_rot180_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f32)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_f32<const LANE1: i32, const LANE2: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32"
+        )]
+        fn _vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
+    _vcmlaq_rot180_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f32)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 2);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64"
+        )]
+        fn _vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
+    _vcmlaq_rot180_f64(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f64)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmla_rot180_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert!(LANE == 0);
+    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot180_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s8)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x16_t) -> int8x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 4);
-    let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmlaq_rot180_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert!(LANE == 0);
+    let c: float32x4_t = simd_shuffle!(
+        c,
+        c,
+        [
+            2 * LANE as u32,
+            2 * LANE as u32 + 1,
+            2 * LANE as u32,
+            2 * LANE as u32 + 1
+        ]
+    );
+    vcmlaq_rot180_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s16)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 3);
-    let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmla_rot180_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot180_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmlaq_rot180_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: float32x4_t = simd_shuffle!(
+        c,
+        c,
+        [
+            2 * LANE as u32,
+            2 * LANE as u32 + 1,
+            2 * LANE as u32,
+            2 * LANE as u32 + 1
+        ]
+    );
+    vcmlaq_rot180_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32"
+        )]
+        fn _vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
+    _vcmla_rot270_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s32)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 2);
-    let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32"
+        )]
+        fn _vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
+    _vcmlaq_rot270_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u8)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(a: uint8x8_t, b: uint8x16_t) -> uint8x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 4);
-    let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64"
+        )]
+        fn _vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
+    _vcmlaq_rot270_f64(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u16)
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 3);
-    let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmla_rot270_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert!(LANE == 0);
+    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot270_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmlaq_rot270_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert!(LANE == 0);
+    let c: float32x4_t = simd_shuffle!(
+        c,
+        c,
+        [
+            2 * LANE as u32,
+            2 * LANE as u32 + 1,
+            2 * LANE as u32,
+            2 * LANE as u32 + 1
+        ]
+    );
+    vcmlaq_rot270_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmla_rot270_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot270_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmlaq_rot270_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: float32x4_t = simd_shuffle!(
+        c,
+        c,
+        [
+            2 * LANE as u32,
+            2 * LANE as u32 + 1,
+            2 * LANE as u32,
+            2 * LANE as u32 + 1
+        ]
+    );
+    vcmlaq_rot270_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32"
+        )]
+        fn _vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    _vcmla_rot90_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32"
+        )]
+        fn _vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    _vcmlaq_rot90_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcmla))]
+pub unsafe fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64"
+        )]
+        fn _vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
     }
+    _vcmlaq_rot90_f64(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmla_rot90_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert!(LANE == 0);
+    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot90_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmlaq_rot90_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert!(LANE == 0);
+    let c: float32x4_t = simd_shuffle!(
+        c,
+        c,
+        [
+            2 * LANE as u32,
+            2 * LANE as u32 + 1,
+            2 * LANE as u32,
+            2 * LANE as u32 + 1
+        ]
+    );
+    vcmlaq_rot90_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmla_rot90_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
+    vcmla_rot90_f32(a, b, c)
+}
+
+#[doc = "Floating-point complex multiply accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+pub unsafe fn vcmlaq_rot90_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: float32x4_t = simd_shuffle!(
+        c,
+        c,
+        [
+            2 * LANE as u32,
+            2 * LANE as u32 + 1,
+            2 * LANE as u32,
+            2 * LANE as u32 + 1
+        ]
+    );
+    vcmlaq_rot90_f32(a, b, c)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
+pub unsafe fn vcopy_lane_f32<const LANE1: i32, const LANE2: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+) -> float32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 2);
-    let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+    static_assert_uimm_bits!(LANE2, 1);
     match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p8)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(a: poly8x8_t, b: poly8x16_t) -> poly8x8_t {
+pub unsafe fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(
+    a: int8x8_t,
+    b: int8x8_t,
+) -> int8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 4);
-    let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    static_assert_uimm_bits!(LANE2, 3);
     match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(a: poly16x4_t, b: poly16x8_t) -> poly16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
+pub unsafe fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(
+    a: int8x16_t,
+    b: int8x8_t,
+) -> int8x16_t {
+    static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 3);
-    let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
+    let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle!(
+            a,
+            b,
+            [
+                16 + LANE2 as u32,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                16 + LANE2 as u32,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                16 + LANE2 as u32,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                16 + LANE2 as u32,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                16 + LANE2 as u32,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                16 + LANE2 as u32,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                16 + LANE2 as u32,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                16 + LANE2 as u32,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                16 + LANE2 as u32,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                16 + LANE2 as u32,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                16 + LANE2 as u32,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                16 + LANE2 as u32,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                16 + LANE2 as u32,
+                13,
+                14,
+                15
+            ]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                16 + LANE2 as u32,
+                14,
+                15
+            ]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                16 + LANE2 as u32,
+                15
+            ]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                16 + LANE2 as u32
+            ]
+        ),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
+pub unsafe fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x16_t, b: int8x8_t) -> int8x16_t {
-    static_assert_uimm_bits!(LANE1, 4);
-    static_assert_uimm_bits!(LANE2, 3);
-    let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b1111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
-        8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
-        9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
-        10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
-        11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
-        12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
-        13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
-        14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
-        15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+pub unsafe fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
+    a: int16x8_t,
+    b: int16x4_t,
+) -> int16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 2);
     let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -3015,72 +3857,68 @@ pub unsafe fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x8_t,
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
+pub unsafe fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u8)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t {
-    static_assert_uimm_bits!(LANE1, 4);
-    static_assert_uimm_bits!(LANE2, 3);
-    let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b1111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
-        8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
-        9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
-        10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
-        11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
-        12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
-        13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
-        14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
-        15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
+pub unsafe fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(
+    a: int32x4_t,
+    b: int32x2_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 1);
+    let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
+pub unsafe fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(
+    a: uint8x8_t,
+    b: uint8x8_t,
+) -> uint8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 2);
-    let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+    static_assert_uimm_bits!(LANE2, 3);
     match LANE1 & 0b111 {
         0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
         1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
@@ -3094,72 +3932,419 @@ pub unsafe fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(a: uint16x8_t,
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 1);
-    let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+pub unsafe fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(
+    a: uint8x16_t,
+    b: uint8x8_t,
+) -> uint8x16_t {
+    static_assert_uimm_bits!(LANE1, 4);
+    static_assert_uimm_bits!(LANE2, 3);
+    let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle!(
+            a,
+            b,
+            [
+                16 + LANE2 as u32,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                16 + LANE2 as u32,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                16 + LANE2 as u32,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                16 + LANE2 as u32,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                16 + LANE2 as u32,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                16 + LANE2 as u32,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                16 + LANE2 as u32,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                16 + LANE2 as u32,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                16 + LANE2 as u32,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                16 + LANE2 as u32,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                16 + LANE2 as u32,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                16 + LANE2 as u32,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                16 + LANE2 as u32,
+                13,
+                14,
+                15
+            ]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                16 + LANE2 as u32,
+                14,
+                15
+            ]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                16 + LANE2 as u32,
+                15
+            ]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                16 + LANE2 as u32
+            ]
+        ),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p8)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(a: poly8x16_t, b: poly8x8_t) -> poly8x16_t {
-    static_assert_uimm_bits!(LANE1, 4);
-    static_assert_uimm_bits!(LANE2, 3);
-    let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b1111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]),
-        8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]),
-        9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]),
-        10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]),
-        11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]),
-        12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]),
-        13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]),
-        14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]),
-        15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]),
+pub unsafe fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(a: poly16x8_t, b: poly16x4_t) -> poly16x8_t {
+pub unsafe fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(
+    a: uint16x8_t,
+    b: uint16x4_t,
+) -> uint16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 2);
-    let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+    let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
     match LANE1 & 0b111 {
         0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
         1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
@@ -3173,18 +4358,21 @@ pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(a: poly16x8_t,
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t, b: int64x1_t) -> int64x2_t {
+pub unsafe fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+) -> uint32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
-    static_assert!(LANE2 == 0);
-    let b: int64x2_t = simd_shuffle!(b, b, [0, 1]);
+    static_assert_uimm_bits!(LANE2, 1);
     match LANE1 & 0b1 {
         0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
         1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
@@ -3192,56 +4380,447 @@ pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t,
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t, b: uint64x1_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert!(LANE2 == 0);
-    let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
+pub unsafe fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(
+    a: uint32x4_t,
+    b: uint32x2_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 1);
+    let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(a: poly64x2_t, b: poly64x1_t) -> poly64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert!(LANE2 == 0);
-    let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+pub unsafe fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(
+    a: poly8x8_t,
+    b: poly8x8_t,
+) -> poly8x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 3);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(
+    a: poly8x16_t,
+    b: poly8x8_t,
+) -> poly8x16_t {
+    static_assert_uimm_bits!(LANE1, 4);
+    static_assert_uimm_bits!(LANE2, 3);
+    let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle!(
+            a,
+            b,
+            [
+                16 + LANE2 as u32,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                16 + LANE2 as u32,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                16 + LANE2 as u32,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                16 + LANE2 as u32,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                16 + LANE2 as u32,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                16 + LANE2 as u32,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                16 + LANE2 as u32,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                16 + LANE2 as u32,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                16 + LANE2 as u32,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                16 + LANE2 as u32,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                16 + LANE2 as u32,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                16 + LANE2 as u32,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                16 + LANE2 as u32,
+                13,
+                14,
+                15
+            ]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                16 + LANE2 as u32,
+                14,
+                15
+            ]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                16 + LANE2 as u32,
+                15
+            ]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                16 + LANE2 as u32
+            ]
+        ),
+        _ => unreachable_unchecked(),
+    }
+}
+
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+pub unsafe fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
+    a: poly16x4_t,
+    b: poly16x4_t,
+) -> poly16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 1);
-    let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
+    static_assert_uimm_bits!(LANE2, 2);
     match LANE1 & 0b11 {
         0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
         1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
@@ -3251,25410 +4830,17501 @@ pub unsafe fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(a: float32x4_t
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert!(LANE2 == 0);
-    let b: float64x2_t = simd_shuffle!(b, b, [0, 1]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(
+    a: poly16x8_t,
+    b: poly16x4_t,
+) -> poly16x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 2);
+    let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcreate_f64(a: u64) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
+    a: float32x2_t,
+    b: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 2);
+    let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_f64_s64(a: int64x1_t) -> float64x1_t {
-    simd_cast(a)
+pub unsafe fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f64_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_f64_s64(a: int64x2_t) -> float64x2_t {
-    simd_cast(a)
+pub unsafe fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+) -> float64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_f64_u64(a: uint64x1_t) -> float64x1_t {
-    simd_cast(a)
+pub unsafe fn vcopy_laneq_s8<const LANE1: i32, const LANE2: i32>(
+    a: int8x8_t,
+    b: int8x16_t,
+) -> int8x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 4);
+    let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f64_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t {
-    simd_cast(a)
+pub unsafe fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(
+    a: int8x16_t,
+    b: int8x16_t,
+) -> int8x16_t {
+    static_assert_uimm_bits!(LANE1, 4);
+    static_assert_uimm_bits!(LANE2, 4);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle!(
+            a,
+            b,
+            [
+                16 + LANE2 as u32,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                16 + LANE2 as u32,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                16 + LANE2 as u32,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                16 + LANE2 as u32,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                16 + LANE2 as u32,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                16 + LANE2 as u32,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                16 + LANE2 as u32,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                16 + LANE2 as u32,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                16 + LANE2 as u32,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                16 + LANE2 as u32,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                16 + LANE2 as u32,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                16 + LANE2 as u32,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                16 + LANE2 as u32,
+                13,
+                14,
+                15
+            ]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                16 + LANE2 as u32,
+                14,
+                15
+            ]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                16 + LANE2 as u32,
+                15
+            ]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                16 + LANE2 as u32
+            ]
+        ),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Floating-point convert to higher precision long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_f32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtl))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t {
-    simd_cast(a)
+pub unsafe fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(
+    a: int16x4_t,
+    b: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 3);
+    let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Floating-point convert to higher precision long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f64_f32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtl))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
-    let b: float32x2_t = simd_shuffle!(a, a, [2, 3]);
-    simd_cast(b)
+pub unsafe fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 3);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Floating-point convert to lower precision narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtn))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t {
-    simd_cast(a)
+pub unsafe fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(
+    a: int32x2_t,
+    b: int32x4_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 2);
+    let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Floating-point convert to lower precision narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtn))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
-    simd_shuffle!(a, simd_cast(b), [0, 1, 2, 3])
+pub unsafe fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Floating-point convert to lower precision narrow, rounding to odd
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_f32_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64")]
-        fn vcvtx_f32_f64_(a: float64x2_t) -> float32x2_t;
+pub unsafe fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(
+    a: int64x2_t,
+    b: int64x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvtx_f32_f64_(a)
 }
 
-/// Floating-point convert to lower precision narrow, rounding to odd
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtxd_f32_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtxd_f32_f64(a: f64) -> f32 {
-    simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0)
+pub unsafe fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(
+    a: uint8x8_t,
+    b: uint8x16_t,
+) -> uint8x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 4);
+    let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Floating-point convert to lower precision narrow, rounding to odd
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_high_f32_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn))]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
-    simd_shuffle!(a, vcvtx_f32_f64(b), [0, 1, 2, 3])
+pub unsafe fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(
+    a: uint8x16_t,
+    b: uint8x16_t,
+) -> uint8x16_t {
+    static_assert_uimm_bits!(LANE1, 4);
+    static_assert_uimm_bits!(LANE2, 4);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle!(
+            a,
+            b,
+            [
+                16 + LANE2 as u32,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                16 + LANE2 as u32,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                16 + LANE2 as u32,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                16 + LANE2 as u32,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                16 + LANE2 as u32,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                16 + LANE2 as u32,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                16 + LANE2 as u32,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                16 + LANE2 as u32,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                16 + LANE2 as u32,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                16 + LANE2 as u32,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                16 + LANE2 as u32,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                16 + LANE2 as u32,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                16 + LANE2 as u32,
+                13,
+                14,
+                15
+            ]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                16 + LANE2 as u32,
+                14,
+                15
+            ]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                16 + LANE2 as u32,
+                15
+            ]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                16 + LANE2 as u32
+            ]
+        ),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64")]
-        fn vcvt_n_f64_s64_(a: int64x1_t, n: i32) -> float64x1_t;
+pub unsafe fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(
+    a: uint16x4_t,
+    b: uint16x8_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 3);
+    let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvt_n_f64_s64_(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f64_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64")]
-        fn vcvtq_n_f64_s64_(a: int64x2_t, n: i32) -> float64x2_t;
+pub unsafe fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 3);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvtq_n_f64_s64_(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_f32_s32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.f32.i32")]
-        fn vcvts_n_f32_s32_(a: i32, n: i32) -> f32;
+pub unsafe fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(
+    a: uint32x2_t,
+    b: uint32x4_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 2);
+    let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvts_n_f32_s32_(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_f64_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.f64.i64")]
-        fn vcvtd_n_f64_s64_(a: i64, n: i32) -> f64;
+pub unsafe fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvtd_n_f64_s64_(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64")]
-        fn vcvt_n_f64_u64_(a: uint64x1_t, n: i32) -> float64x1_t;
+pub unsafe fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
+    a: uint64x2_t,
+    b: uint64x2_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvt_n_f64_u64_(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f64_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64")]
-        fn vcvtq_n_f64_u64_(a: uint64x2_t, n: i32) -> float64x2_t;
+pub unsafe fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(
+    a: poly8x8_t,
+    b: poly8x16_t,
+) -> poly8x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 4);
+    let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvtq_n_f64_u64_(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_f32_u32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.f32.i32")]
-        fn vcvts_n_f32_u32_(a: u32, n: i32) -> f32;
+pub unsafe fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(
+    a: poly8x16_t,
+    b: poly8x16_t,
+) -> poly8x16_t {
+    static_assert_uimm_bits!(LANE1, 4);
+    static_assert_uimm_bits!(LANE2, 4);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle!(
+            a,
+            b,
+            [
+                16 + LANE2 as u32,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                16 + LANE2 as u32,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                16 + LANE2 as u32,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                16 + LANE2 as u32,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                16 + LANE2 as u32,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                16 + LANE2 as u32,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                16 + LANE2 as u32,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                16 + LANE2 as u32,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                16 + LANE2 as u32,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                16 + LANE2 as u32,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                16 + LANE2 as u32,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                16 + LANE2 as u32,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                16 + LANE2 as u32,
+                13,
+                14,
+                15
+            ]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                16 + LANE2 as u32,
+                14,
+                15
+            ]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                16 + LANE2 as u32,
+                15
+            ]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                16 + LANE2 as u32
+            ]
+        ),
+        _ => unreachable_unchecked(),
     }
-    vcvts_n_f32_u32_(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_f64_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.f64.i64")]
-        fn vcvtd_n_f64_u64_(a: u64, n: i32) -> f64;
+pub unsafe fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(
+    a: poly16x4_t,
+    b: poly16x8_t,
+) -> poly16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 3);
+    let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvtd_n_f64_u64_(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s64_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64")]
-        fn vcvt_n_s64_f64_(a: float64x1_t, n: i32) -> int64x1_t;
+pub unsafe fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(
+    a: poly16x8_t,
+    b: poly16x8_t,
+) -> poly16x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 3);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvt_n_s64_f64_(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s64_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64")]
-        fn vcvtq_n_s64_f64_(a: float64x2_t, n: i32) -> int64x2_t;
+pub unsafe fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
+    a: poly64x2_t,
+    b: poly64x2_t,
+) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvtq_n_s64_f64_(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_s32_f32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f32")]
-        fn vcvts_n_s32_f32_(a: f32, n: i32) -> i32;
+pub unsafe fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(
+    a: float32x4_t,
+    b: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 1);
+    let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvts_n_s32_f32_(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_s64_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f64")]
-        fn vcvtd_n_s64_f64_(a: f64, n: i32) -> i64;
+pub unsafe fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(
+    a: float64x2_t,
+    b: float64x1_t,
+) -> float64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert!(LANE2 == 0);
+    let b: float64x2_t = simd_shuffle!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvtd_n_s64_f64_(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u64_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64")]
-        fn vcvt_n_u64_f64_(a: float64x1_t, n: i32) -> uint64x1_t;
+pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(
+    a: int64x2_t,
+    b: int64x1_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert!(LANE2 == 0);
+    let b: int64x2_t = simd_shuffle!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvt_n_u64_f64_(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u64_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64")]
-        fn vcvtq_n_u64_f64_(a: float64x2_t, n: i32) -> uint64x2_t;
+pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(
+    a: uint64x2_t,
+    b: uint64x1_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert!(LANE2 == 0);
+    let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvtq_n_u64_f64_(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_u32_f32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f32")]
-        fn vcvts_n_u32_f32_(a: f32, n: i32) -> u32;
+pub unsafe fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
+    a: poly64x2_t,
+    b: poly64x1_t,
+) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert!(LANE2 == 0);
+    let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
     }
-    vcvts_n_u32_f32_(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_u64_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_n_u64_f64<const N: i32>(a: f64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f64")]
-        fn vcvtd_n_u64_f64_(a: f64, n: i32) -> u64;
-    }
-    vcvtd_n_u64_f64_(a, N)
+pub unsafe fn vcreate_f64(a: u64) -> float64x1_t {
+    transmute(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_f32_s32)
+#[doc = "Floating-point convert to lower precision narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf))]
+#[cfg_attr(test, assert_instr(fcvtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_f32_s32(a: i32) -> f32 {
-    a as f32
+pub unsafe fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t {
+    simd_cast(a)
+}
+
+#[doc = "Floating-point convert to higher precision long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t {
+    simd_cast(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_f64_s64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(scvtf))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_f64_s64(a: i64) -> f64 {
-    a as f64
+pub unsafe fn vcvt_f64_s64(a: int64x1_t) -> float64x1_t {
+    simd_cast(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_f32_u32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf))]
+#[cfg_attr(test, assert_instr(scvtf))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_f32_u32(a: u32) -> f32 {
-    a as f32
+pub unsafe fn vcvtq_f64_s64(a: int64x2_t) -> float64x2_t {
+    simd_cast(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_f64_u64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ucvtf))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_f64_u64(a: u64) -> f64 {
-    a as f64
+pub unsafe fn vcvt_f64_u64(a: uint64x1_t) -> float64x1_t {
+    simd_cast(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_s32_f32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs))]
+#[cfg_attr(test, assert_instr(ucvtf))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_s32_f32(a: f32) -> i32 {
-    a as i32
+pub unsafe fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    simd_cast(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_s64_f64)
+#[doc = "Floating-point convert to lower precision narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs))]
+#[cfg_attr(test, assert_instr(fcvtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_s64_f64(a: f64) -> i64 {
-    a as i64
+pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
+    simd_shuffle!(a, simd_cast(b), [0, 1, 2, 3])
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_u32_f32)
+#[doc = "Floating-point convert to higher precision long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
+#[cfg_attr(test, assert_instr(fcvtl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_u32_f32(a: f32) -> u32 {
-    a as u32
+pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
+    let b: float32x2_t = simd_shuffle!(a, a, [2, 3]);
+    simd_cast(b)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_u64_f64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
-    a as u64
+pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64"
+        )]
+        fn _vcvt_n_f64_s64(a: int64x1_t, n: i32) -> float64x1_t;
+    }
+    _vcvt_n_f64_s64(a, N)
+}
+
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64"
+        )]
+        fn _vcvtq_n_f64_s64(a: int64x2_t, n: i32) -> float64x2_t;
+    }
+    _vcvtq_n_f64_s64(a, N)
+}
+
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64"
+        )]
+        fn _vcvt_n_f64_u64(a: int64x1_t, n: i32) -> float64x1_t;
+    }
+    _vcvt_n_f64_u64(a.as_signed(), N)
+}
+
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64"
+        )]
+        fn _vcvtq_n_f64_u64(a: int64x2_t, n: i32) -> float64x2_t;
+    }
+    _vcvtq_n_f64_u64(a.as_signed(), N)
+}
+
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64"
+        )]
+        fn _vcvt_n_s64_f64(a: float64x1_t, n: i32) -> int64x1_t;
+    }
+    _vcvt_n_s64_f64(a, N)
+}
+
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64"
+        )]
+        fn _vcvtq_n_s64_f64(a: float64x2_t, n: i32) -> int64x2_t;
+    }
+    _vcvtq_n_s64_f64(a, N)
+}
+
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64"
+        )]
+        fn _vcvt_n_u64_f64(a: float64x1_t, n: i32) -> int64x1_t;
+    }
+    _vcvt_n_u64_f64(a, N).as_unsigned()
+}
+
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64"
+        )]
+        fn _vcvtq_n_u64_f64(a: float64x2_t, n: i32) -> int64x2_t;
+    }
+    _vcvtq_n_u64_f64(a, N).as_unsigned()
 }
 
-/// Floating-point convert to signed fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s64_f64)
+#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptosi.sat.v1i64.v1f64")]
-        fn vcvt_s64_f64_(a: float64x1_t) -> int64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptosi.sat.v1i64.v1f64"
+        )]
+        fn _vcvt_s64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvt_s64_f64_(a)
+    _vcvt_s64_f64(a)
 }
 
-/// Floating-point convert to signed fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s64_f64)
+#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptosi.sat.v2i64.v2f64")]
-        fn vcvtq_s64_f64_(a: float64x2_t) -> int64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptosi.sat.v2i64.v2f64"
+        )]
+        fn _vcvtq_s64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtq_s64_f64_(a)
+    _vcvtq_s64_f64(a)
 }
 
-/// Floating-point convert to unsigned fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u64_f64)
+#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptoui.sat.v1i64.v1f64")]
-        fn vcvt_u64_f64_(a: float64x1_t) -> uint64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptoui.sat.v1i64.v1f64"
+        )]
+        fn _vcvt_u64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvt_u64_f64_(a)
+    _vcvt_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u64_f64)
+#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptoui.sat.v2i64.v2f64")]
-        fn vcvtq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptoui.sat.v2i64.v2f64"
+        )]
+        fn _vcvtq_u64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtq_u64_f64_(a)
+    _vcvtq_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s32_f32)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32")]
-        fn vcvta_s32_f32_(a: float32x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32"
+        )]
+        fn _vcvta_s32_f32(a: float32x2_t) -> int32x2_t;
     }
-    vcvta_s32_f32_(a)
+    _vcvta_s32_f32(a)
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s32_f32)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32")]
-        fn vcvtaq_s32_f32_(a: float32x4_t) -> int32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32"
+        )]
+        fn _vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t;
     }
-    vcvtaq_s32_f32_(a)
+    _vcvtaq_s32_f32(a)
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s64_f64)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64")]
-        fn vcvta_s64_f64_(a: float64x1_t) -> int64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64"
+        )]
+        fn _vcvta_s64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvta_s64_f64_(a)
+    _vcvta_s64_f64(a)
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s64_f64)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64")]
-        fn vcvtaq_s64_f64_(a: float64x2_t) -> int64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64"
+        )]
+        fn _vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtaq_s64_f64_(a)
+    _vcvtaq_s64_f64(a)
 }
 
-/// Floating-point convert to integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_s32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtas))]
+#[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.i32.f32")]
-        fn vcvtas_s32_f32_(a: f32) -> i32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32"
+        )]
+        fn _vcvta_u32_f32(a: float32x2_t) -> int32x2_t;
     }
-    vcvtas_s32_f32_(a)
+    _vcvta_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtad_s64_f64)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtas))]
+#[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtas.i64.f64")]
-        fn vcvtad_s64_f64_(a: f64) -> i64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32"
+        )]
+        fn _vcvtaq_u32_f32(a: float32x4_t) -> int32x4_t;
     }
-    vcvtad_s64_f64_(a)
+    _vcvtaq_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_u32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.i32.f32")]
-        fn vcvtas_u32_f32_(a: f32) -> u32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64"
+        )]
+        fn _vcvta_u64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvtas_u32_f32_(a)
+    _vcvta_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtad_u64_f64)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.i64.f64")]
-        fn vcvtad_u64_f64_(a: f64) -> u64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64"
+        )]
+        fn _vcvtaq_u64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtad_u64_f64_(a)
+    _vcvtaq_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s32_f32)
+#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
+#[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32")]
-        fn vcvtn_s32_f32_(a: float32x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.i32.f32"
+        )]
+        fn _vcvtas_s32_f32(a: f32) -> i32;
     }
-    vcvtn_s32_f32_(a)
+    _vcvtas_s32_f32(a)
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s32_f32)
+#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtad_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
+#[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32")]
-        fn vcvtnq_s32_f32_(a: float32x4_t) -> int32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.i64.f64"
+        )]
+        fn _vcvtad_s64_f64(a: f64) -> i64;
     }
-    vcvtnq_s32_f32_(a)
+    _vcvtad_s64_f64(a)
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s64_f64)
+#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
+#[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64")]
-        fn vcvtn_s64_f64_(a: float64x1_t) -> int64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.i32.f32"
+        )]
+        fn _vcvtas_u32_f32(a: f32) -> i32;
     }
-    vcvtn_s64_f64_(a)
+    _vcvtas_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s64_f64)
+#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtad_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
+#[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64")]
-        fn vcvtnq_s64_f64_(a: float64x2_t) -> int64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.i64.f64"
+        )]
+        fn _vcvtad_u64_f64(a: f64) -> i64;
     }
-    vcvtnq_s64_f64_(a)
+    _vcvtad_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_s32_f32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
+#[cfg_attr(test, assert_instr(scvtf))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.i32.f32")]
-        fn vcvtns_s32_f32_(a: f32) -> i32;
-    }
-    vcvtns_s32_f32_(a)
+pub unsafe fn vcvtd_f64_s64(a: i64) -> f64 {
+    a as f64
 }
 
-/// Floating-point convert to signed integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnd_s64_f64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
+#[cfg_attr(test, assert_instr(scvtf))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtns.i64.f64")]
-        fn vcvtnd_s64_f64_(a: f64) -> i64;
-    }
-    vcvtnd_s64_f64_(a)
+pub unsafe fn vcvts_f32_s32(a: i32) -> f32 {
+    a as f32
 }
 
-/// Floating-point convert to signed integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s32_f32)
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32")]
-        fn vcvtm_s32_f32_(a: float32x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32"
+        )]
+        fn _vcvtm_s32_f32(a: float32x2_t) -> int32x2_t;
     }
-    vcvtm_s32_f32_(a)
+    _vcvtm_s32_f32(a)
 }
 
-/// Floating-point convert to signed integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s32_f32)
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32")]
-        fn vcvtmq_s32_f32_(a: float32x4_t) -> int32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32"
+        )]
+        fn _vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t;
     }
-    vcvtmq_s32_f32_(a)
+    _vcvtmq_s32_f32(a)
 }
 
-/// Floating-point convert to signed integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s64_f64)
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64")]
-        fn vcvtm_s64_f64_(a: float64x1_t) -> int64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64"
+        )]
+        fn _vcvtm_s64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvtm_s64_f64_(a)
+    _vcvtm_s64_f64(a)
 }
 
-/// Floating-point convert to signed integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s64_f64)
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64")]
-        fn vcvtmq_s64_f64_(a: float64x2_t) -> int64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64"
+        )]
+        fn _vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtmq_s64_f64_(a)
+    _vcvtmq_s64_f64(a)
 }
 
-/// Floating-point convert to signed integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_s32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtms))]
+#[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.i32.f32")]
-        fn vcvtms_s32_f32_(a: f32) -> i32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32"
+        )]
+        fn _vcvtm_u32_f32(a: float32x2_t) -> int32x2_t;
     }
-    vcvtms_s32_f32_(a)
+    _vcvtm_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to signed integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmd_s64_f64)
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtms))]
+#[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtms.i64.f64")]
-        fn vcvtmd_s64_f64_(a: f64) -> i64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32"
+        )]
+        fn _vcvtmq_u32_f32(a: float32x4_t) -> int32x4_t;
     }
-    vcvtmd_s64_f64_(a)
+    _vcvtmq_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to signed integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
+#[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32")]
-        fn vcvtp_s32_f32_(a: float32x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64"
+        )]
+        fn _vcvtm_u64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvtp_s32_f32_(a)
+    _vcvtm_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to signed integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
+#[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32")]
-        fn vcvtpq_s32_f32_(a: float32x4_t) -> int32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64"
+        )]
+        fn _vcvtmq_u64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtpq_s32_f32_(a)
+    _vcvtmq_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to signed integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s64_f64)
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
+#[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64")]
-        fn vcvtp_s64_f64_(a: float64x1_t) -> int64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.i32.f32"
+        )]
+        fn _vcvtms_s32_f32(a: f32) -> i32;
     }
-    vcvtp_s64_f64_(a)
+    _vcvtms_s32_f32(a)
 }
 
-/// Floating-point convert to signed integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s64_f64)
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmd_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
+#[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64")]
-        fn vcvtpq_s64_f64_(a: float64x2_t) -> int64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.i64.f64"
+        )]
+        fn _vcvtmd_s64_f64(a: f64) -> i64;
     }
-    vcvtpq_s64_f64_(a)
+    _vcvtmd_s64_f64(a)
 }
 
-/// Floating-point convert to signed integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_s32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
+#[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.i32.f32")]
-        fn vcvtps_s32_f32_(a: f32) -> i32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.i32.f32"
+        )]
+        fn _vcvtms_u32_f32(a: f32) -> i32;
     }
-    vcvtps_s32_f32_(a)
+    _vcvtms_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to signed integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpd_s64_f64)
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmd_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
+#[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtps.i64.f64")]
-        fn vcvtpd_s64_f64_(a: f64) -> i64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.i64.f64"
+        )]
+        fn _vcvtmd_u64_f64(a: f64) -> i64;
     }
-    vcvtpd_s64_f64_(a)
+    _vcvtmd_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u32_f32)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
+#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32")]
-        fn vcvta_u32_f32_(a: float32x2_t) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32"
+        )]
+        fn _vcvtn_s32_f32(a: float32x2_t) -> int32x2_t;
     }
-    vcvta_u32_f32_(a)
+    _vcvtn_s32_f32(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u32_f32)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
+#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32")]
-        fn vcvtaq_u32_f32_(a: float32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32"
+        )]
+        fn _vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t;
     }
-    vcvtaq_u32_f32_(a)
+    _vcvtnq_s32_f32(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u64_f64)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
+#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64")]
-        fn vcvta_u64_f64_(a: float64x1_t) -> uint64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64"
+        )]
+        fn _vcvtn_s64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvta_u64_f64_(a)
+    _vcvtn_s64_f64(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u64_f64)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
+#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64")]
-        fn vcvtaq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64"
+        )]
+        fn _vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtaq_u64_f64_(a)
+    _vcvtnq_s64_f64(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32")]
-        fn vcvtn_u32_f32_(a: float32x2_t) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32"
+        )]
+        fn _vcvtn_u32_f32(a: float32x2_t) -> int32x2_t;
     }
-    vcvtn_u32_f32_(a)
+    _vcvtn_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32")]
-        fn vcvtnq_u32_f32_(a: float32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32"
+        )]
+        fn _vcvtnq_u32_f32(a: float32x4_t) -> int32x4_t;
     }
-    vcvtnq_u32_f32_(a)
+    _vcvtnq_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u64_f64)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64")]
-        fn vcvtn_u64_f64_(a: float64x1_t) -> uint64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64"
+        )]
+        fn _vcvtn_u64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvtn_u64_f64_(a)
+    _vcvtn_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u64_f64)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64")]
-        fn vcvtnq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64"
+        )]
+        fn _vcvtnq_u64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtnq_u64_f64_(a)
+    _vcvtnq_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_u32_f32)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtnu))]
+#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.i32.f32")]
-        fn vcvtns_u32_f32_(a: f32) -> u32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.i32.f32"
+        )]
+        fn _vcvtns_s32_f32(a: f32) -> i32;
     }
-    vcvtns_u32_f32_(a)
+    _vcvtns_s32_f32(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnd_u64_f64)
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnd_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtnu))]
+#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtnu.i64.f64")]
-        fn vcvtnd_u64_f64_(a: f64) -> u64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.i64.f64"
+        )]
+        fn _vcvtnd_s64_f64(a: f64) -> i64;
     }
-    vcvtnd_u64_f64_(a)
+    _vcvtnd_s64_f64(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32")]
-        fn vcvtm_u32_f32_(a: float32x2_t) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.i32.f32"
+        )]
+        fn _vcvtns_u32_f32(a: f32) -> i32;
     }
-    vcvtm_u32_f32_(a)
+    _vcvtns_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnd_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32")]
-        fn vcvtmq_u32_f32_(a: float32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.i64.f64"
+        )]
+        fn _vcvtnd_u64_f64(a: f64) -> i64;
     }
-    vcvtmq_u32_f32_(a)
+    _vcvtnd_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u64_f64)
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64")]
-        fn vcvtm_u64_f64_(a: float64x1_t) -> uint64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32"
+        )]
+        fn _vcvtp_s32_f32(a: float32x2_t) -> int32x2_t;
     }
-    vcvtm_u64_f64_(a)
+    _vcvtp_s32_f32(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u64_f64)
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64")]
-        fn vcvtmq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32"
+        )]
+        fn _vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t;
     }
-    vcvtmq_u64_f64_(a)
+    _vcvtpq_s32_f32(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_u32_f32)
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.i32.f32")]
-        fn vcvtms_u32_f32_(a: f32) -> u32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64"
+        )]
+        fn _vcvtp_s64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvtms_u32_f32_(a)
+    _vcvtp_s64_f64(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmd_u64_f64)
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtmu.i64.f64")]
-        fn vcvtmd_u64_f64_(a: f64) -> u64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64"
+        )]
+        fn _vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtmd_u64_f64_(a)
+    _vcvtpq_s64_f64(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32")]
-        fn vcvtp_u32_f32_(a: float32x2_t) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32"
+        )]
+        fn _vcvtp_u32_f32(a: float32x2_t) -> int32x2_t;
     }
-    vcvtp_u32_f32_(a)
+    _vcvtp_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u32_f32)
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32")]
-        fn vcvtpq_u32_f32_(a: float32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32"
+        )]
+        fn _vcvtpq_u32_f32(a: float32x4_t) -> int32x4_t;
     }
-    vcvtpq_u32_f32_(a)
+    _vcvtpq_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u64_f64)
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64")]
-        fn vcvtp_u64_f64_(a: float64x1_t) -> uint64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64"
+        )]
+        fn _vcvtp_u64_f64(a: float64x1_t) -> int64x1_t;
     }
-    vcvtp_u64_f64_(a)
+    _vcvtp_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u64_f64)
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64")]
-        fn vcvtpq_u64_f64_(a: float64x2_t) -> uint64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64"
+        )]
+        fn _vcvtpq_u64_f64(a: float64x2_t) -> int64x2_t;
     }
-    vcvtpq_u64_f64_(a)
+    _vcvtpq_u64_f64(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_u32_f32)
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtpu))]
+#[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.i32.f32")]
-        fn vcvtps_u32_f32_(a: f32) -> u32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.i32.f32"
+        )]
+        fn _vcvtps_s32_f32(a: f32) -> i32;
     }
-    vcvtps_u32_f32_(a)
+    _vcvtps_s32_f32(a)
 }
 
-/// Floating-point convert to unsigned integer, rounding toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpd_u64_f64)
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpd_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtpu))]
+#[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpd_u64_f64(a: f64) -> u64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fcvtpu.i64.f64")]
-        fn vcvtpd_u64_f64_(a: f64) -> u64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.i64.f64"
+        )]
+        fn _vcvtpd_s64_f64(a: f64) -> i64;
     }
-    vcvtpd_u64_f64_(a)
+    _vcvtpd_s64_f64(a)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup, N = 1))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.i32.f32"
+        )]
+        fn _vcvtps_u32_f32(a: f32) -> i32;
+    }
+    _vcvtps_u32_f32(a).as_unsigned()
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpd_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup, N = 0))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x2_t {
-    static_assert!(N == 0);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+pub unsafe fn vcvtpd_u64_f64(a: f64) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.i64.f64"
+        )]
+        fn _vcvtpd_u64_f64(a: f64) -> i64;
+    }
+    _vcvtpd_u64_f64(a).as_unsigned()
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup, N = 1))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(ucvtf))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_laneq_f64<const N: i32>(a: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+pub unsafe fn vcvts_f32_u32(a: u32) -> f32 {
+    a as f32
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup, N = 0))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(ucvtf))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_lane_f64<const N: i32>(a: float64x1_t) -> float64x2_t {
-    static_assert!(N == 0);
-    simd_shuffle!(a, a, [N as u32, N as u32])
-}
+pub unsafe fn vcvtd_f64_u64(a: u64) -> f64 {
+    a as f64
+}
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x1_t {
-    static_assert!(N == 0);
-    a
+pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.f32.i32"
+        )]
+        fn _vcvts_n_f32_s32(a: i32, n: i32) -> f32;
+    }
+    _vcvts_n_f32_s32(a, N)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_lane_f64<const N: i32>(a: float64x1_t) -> float64x1_t {
-    static_assert!(N == 0);
-    a
+pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.f64.i64"
+        )]
+        fn _vcvtd_n_f64_s64(a: i64, n: i32) -> f64;
+    }
+    _vcvtd_n_f64_s64(a, N)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t {
-    static_assert_uimm_bits!(N, 1);
-    transmute::<u64, _>(simd_extract!(a, N as u32))
+pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.f32.i32"
+        )]
+        fn _vcvts_n_f32_u32(a: i32, n: i32) -> f32;
+    }
+    _vcvts_n_f32_u32(a.as_signed(), N)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t {
-    static_assert_uimm_bits!(N, 1);
-    transmute::<f64, _>(simd_extract!(a, N as u32))
+pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.f64.i64"
+        )]
+        fn _vcvtd_n_f64_u64(a: i64, n: i32) -> f64;
+    }
+    _vcvtd_n_f64_u64(a.as_signed(), N)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_s8)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f32"
+        )]
+        fn _vcvts_n_s32_f32(a: f32, n: i32) -> i32;
+    }
+    _vcvts_n_s32_f32(a, N)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_s8)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f64"
+        )]
+        fn _vcvtd_n_s64_f64(a: f64, n: i32) -> i64;
+    }
+    _vcvtd_n_s64_f64(a, N)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_s16)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f32"
+        )]
+        fn _vcvts_n_u32_f32(a: f32, n: i32) -> i32;
+    }
+    _vcvts_n_u32_f32(a, N).as_unsigned()
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_s16)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvtd_n_u64_f64<const N: i32>(a: f64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f64"
+        )]
+        fn _vcvtd_n_u64_f64(a: f64, n: i32) -> i64;
+    }
+    _vcvtd_n_u64_f64(a, N).as_unsigned()
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_s32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvts_s32_f32(a: f32) -> i32 {
+    a as i32
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_s32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvtd_s64_f64(a: f64) -> i64 {
+    a as i64
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_s64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 {
-    static_assert!(N == 0);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvts_u32_f32(a: f32) -> u32 {
+    a as u32
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_s64)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
+    a as u64
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_u8)
+#[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64"
+        )]
+        fn _vcvtx_f32_f64(a: float64x2_t) -> float32x2_t;
+    }
+    _vcvtx_f32_f64(a)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_u8)
+#[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_high_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 8))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
+    simd_shuffle!(a, vcvtx_f32_f64(b), [0, 1, 2, 3])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_u16)
+#[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtxd_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+pub unsafe fn vcvtxd_f32_f64(a: f64) -> f32 {
+    simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_u16)
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(fdiv))]
+pub unsafe fn vdiv_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_div(a, b)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_u32)
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(fdiv))]
+pub unsafe fn vdivq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_div(a, b)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_u32)
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(fdiv))]
+pub unsafe fn vdiv_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_div(a, b)
+}
+
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub unsafe fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_div(a, b)
+}
+
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub unsafe fn vdot_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int8x8_t,
+    c: int8x16_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = transmute(c);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_s32(a, b, transmute(c))
+}
+
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub unsafe fn vdotq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int8x16_t,
+    c: int8x16_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = transmute(c);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_s32(a, b, transmute(c))
+}
+
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub unsafe fn vdot_laneq_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint8x8_t,
+    c: uint8x16_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_u32(a, b, transmute(c))
+}
+
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub unsafe fn vdotq_laneq_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint8x16_t,
+    c: uint8x16_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_u32(a, b, transmute(c))
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_u64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(nop, N = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
+pub unsafe fn vdup_lane_f64<const N: i32>(a: float64x1_t) -> float64x1_t {
     static_assert!(N == 0);
-    simd_extract!(a, N as u32)
+    a
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_u64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+pub unsafe fn vdup_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x1_t {
+    static_assert!(N == 0);
+    a
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_p8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+pub unsafe fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t {
+    static_assert_uimm_bits!(N, 1);
+    transmute::<f64, _>(simd_extract!(a, N as u32))
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_p8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(a, N as u32)
+pub unsafe fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t {
+    static_assert_uimm_bits!(N, 1);
+    transmute::<u64, _>(simd_extract!(a, N as u32))
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_p16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_lane_p16<const N: i32>(a: poly16x4_t) -> p16 {
-    static_assert_uimm_bits!(N, 2);
+pub unsafe fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 {
+    static_assert_uimm_bits!(N, 3);
     simd_extract!(a, N as u32)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_p16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(nop, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 {
+pub unsafe fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 {
     static_assert_uimm_bits!(N, 3);
     simd_extract!(a, N as u32)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_f32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 {
-    static_assert_uimm_bits!(N, 1);
+pub unsafe fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 {
+    static_assert_uimm_bits!(N, 3);
     simd_extract!(a, N as u32)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_f32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 {
-    static_assert_uimm_bits!(N, 2);
+pub unsafe fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 {
+    static_assert_uimm_bits!(N, 3);
     simd_extract!(a, N as u32)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_f64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 {
-    static_assert!(N == 0);
+pub unsafe fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 {
+    static_assert_uimm_bits!(N, 3);
     simd_extract!(a, N as u32)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_f64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 {
-    static_assert_uimm_bits!(N, 1);
+pub unsafe fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 {
+    static_assert_uimm_bits!(N, 3);
     simd_extract!(a, N as u32)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ext, N = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(a, N as u32)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ext, N = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(a, N as u32)
 }
 
-/// Floating-point multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
+#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmla_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(a, N as u32)
 }
 
-/// Floating-point multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 {
+    static_assert!(N == 0);
+    simd_extract!(a, N as u32)
 }
 
-/// Signed multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmlal_s8(a, b, c)
+pub unsafe fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 {
+    static_assert!(N == 0);
+    simd_extract!(a, N as u32)
 }
 
-/// Signed multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    vmlal_s16(a, b, c)
+pub unsafe fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
+    static_assert!(N == 0);
+    simd_extract!(a, N as u32)
 }
 
-/// Signed multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(dup, N = 0))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-    vmlal_s32(a, b, c)
+pub unsafe fn vdupq_lane_f64<const N: i32>(a: float64x1_t) -> float64x2_t {
+    static_assert!(N == 0);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Unsigned multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(dup, N = 0))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmlal_u8(a, b, c)
+pub unsafe fn vdupq_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x2_t {
+    static_assert!(N == 0);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Unsigned multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(dup, N = 1))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    vmlal_u16(a, b, c)
+pub unsafe fn vdupq_laneq_f64<const N: i32>(a: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Unsigned multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(dup, N = 1))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-    vmlal_u32(a, b, c)
+pub unsafe fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vmlal_high_s16(a, b, vdupq_n_s16(c))
+pub unsafe fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vmlal_high_s32(a, b, vdupq_n_s32(c))
+pub unsafe fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
-    vmlal_high_u16(a, b, vdupq_n_u16(c))
+pub unsafe fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
-    vmlal_high_u32(a, b, vdupq_n_u32(c))
+pub unsafe fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
 }
 
-/// Multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vduph_lane_p16<const N: i32>(a: poly16x4_t) -> p16 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
 }
 
-/// Floating-point multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f64)
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmls_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    simd_sub(a, simd_mul(b, c))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3s.v16i8"
+        )]
+        fn _veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    _veor3q_s8(a, b, c)
 }
 
-/// Floating-point multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f64)
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    simd_sub(a, simd_mul(b, c))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3s.v8i16"
+        )]
+        fn _veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _veor3q_s16(a, b, c)
 }
 
-/// Signed multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s8)
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmlsl_s8(a, b, c)
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3s.v4i32"
+        )]
+        fn _veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _veor3q_s32(a, b, c)
 }
 
-/// Signed multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s16)
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    vmlsl_s16(a, b, c)
-}
-
-/// Signed multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-    vmlsl_s32(a, b, c)
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3s.v2i64"
+        )]
+        fn _veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _veor3q_s64(a, b, c)
 }
 
-/// Unsigned multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u8)
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmlsl_u8(a, b, c)
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3u.v16i8"
+        )]
+        fn _veor3q_u8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    _veor3q_u8(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Unsigned multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u16)
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    vmlsl_u16(a, b, c)
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3u.v8i16"
+        )]
+        fn _veor3q_u16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _veor3q_u16(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Unsigned multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u32)
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-    vmlsl_u32(a, b, c)
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3u.v4i32"
+        )]
+        fn _veor3q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _veor3q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vmlsl_high_s16(a, b, vdupq_n_s16(c))
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3u.v2i64"
+        )]
+        fn _veor3q_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _veor3q_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s32)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(test, assert_instr(ext, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vmlsl_high_s32(a, b, vdupq_n_s32(c))
+pub unsafe fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u16)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(test, assert_instr(ext, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
-    vmlsl_high_u16(a, b, vdupq_n_u16(c))
+pub unsafe fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u32)
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
-    vmlsl_high_u32(a, b, vdupq_n_u32(c))
+#[cfg_attr(test, assert_instr(fmadd))]
+pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.v1f64"
+        )]
+        fn _vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t;
+    }
+    _vfma_f64(b, c, a)
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s16)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vfma_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vfma_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
+pub unsafe fn vfmaq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+    vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+pub unsafe fn vfmaq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+    vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vfmaq_laneq_f64<const LANE: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+    c: float64x2_t,
+) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vfma_lane_f64<const LANE: i32>(
+    a: float64x1_t,
+    b: float64x1_t,
+    c: float64x1_t,
+) -> float64x1_t {
+    static_assert!(LANE == 0);
+    vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t {
+pub unsafe fn vfma_laneq_f64<const LANE: i32>(
+    a: float64x1_t,
+    b: float64x1_t,
+    c: float64x2_t,
+) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+    vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
 }
 
-/// Multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(test, assert_instr(fmadd))]
+pub unsafe fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
+    vfma_f64(a, b, vdup_n_f64(c))
 }
 
-/// Extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s16)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    let c: int8x8_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.f64"
+        )]
+        fn _vfmad_lane_f64(a: f64, b: f64, c: f64) -> f64;
+    }
+    static_assert!(LANE == 0);
+    let c: f64 = simd_extract!(c, LANE as u32);
+    _vfmad_lane_f64(b, c, a)
 }
 
-/// Extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s32)
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(xtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    let c: int16x4_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(fmla))]
+pub unsafe fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.v2f64"
+        )]
+        fn _vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    _vfmaq_f64(b, c, a)
 }
 
-/// Extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s64)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    let c: int32x2_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3])
+pub unsafe fn vfmaq_lane_f64<const LANE: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+    c: float64x1_t,
+) -> float64x2_t {
+    static_assert!(LANE == 0);
+    vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
 }
 
-/// Extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u16)
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(xtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    let c: uint8x8_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(fmla))]
+pub unsafe fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
+    vfmaq_f64(a, b, vdupq_n_f64(c))
 }
 
-/// Extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u32)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    let c: uint16x4_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.f32"
+        )]
+        fn _vfmas_lane_f32(a: f32, b: f32, c: f32) -> f32;
+    }
+    static_assert_uimm_bits!(LANE, 1);
+    let c: f32 = simd_extract!(c, LANE as u32);
+    _vfmas_lane_f32(b, c, a)
 }
 
-/// Extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u64)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    let c: uint32x2_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3])
+pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.f32"
+        )]
+        fn _vfmas_laneq_f32(a: f32, b: f32, c: f32) -> f32;
+    }
+    static_assert_uimm_bits!(LANE, 2);
+    let c: f32 = simd_extract!(c, LANE as u32);
+    _vfmas_laneq_f32(b, c, a)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s64)
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(neg))]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vneg_s64(a: int64x1_t) -> int64x1_t {
-    simd_neg(a)
+pub unsafe fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.f64"
+        )]
+        fn _vfmad_laneq_f64(a: f64, b: f64, c: f64) -> f64;
+    }
+    static_assert_uimm_bits!(LANE, 1);
+    let c: f64 = simd_extract!(c, LANE as u32);
+    _vfmad_laneq_f64(b, c, a)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s64)
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(neg))]
+#[cfg_attr(test, assert_instr(fmsub))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t {
-    simd_neg(a)
+pub unsafe fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    let b: float64x1_t = simd_neg(b);
+    vfma_f64(a, b, c)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegd_s64)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(neg))]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vnegd_s64(a: i64) -> i64 {
-    a.wrapping_neg()
+pub unsafe fn vfms_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f64)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fneg))]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vneg_f64(a: float64x1_t) -> float64x1_t {
-    simd_neg(a)
+pub unsafe fn vfms_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f64)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fneg))]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vnegq_f64(a: float64x2_t) -> float64x2_t {
-    simd_neg(a)
+pub unsafe fn vfmsq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s64)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqneg))]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v1i64")]
-        fn vqneg_s64_(a: int64x1_t) -> int64x1_t;
-    }
-    vqneg_s64_(a)
+pub unsafe fn vfmsq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s64)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqneg))]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v2i64")]
-        fn vqnegq_s64_(a: int64x2_t) -> int64x2_t;
-    }
-    vqnegq_s64_(a)
+pub unsafe fn vfmsq_laneq_f64<const LANE: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+    c: float64x2_t,
+) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegb_s8)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqneg))]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqnegb_s8(a: i8) -> i8 {
-    simd_extract!(vqneg_s8(vdup_n_s8(a)), 0)
+pub unsafe fn vfms_lane_f64<const LANE: i32>(
+    a: float64x1_t,
+    b: float64x1_t,
+    c: float64x1_t,
+) -> float64x1_t {
+    static_assert!(LANE == 0);
+    vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegh_s16)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqneg))]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqnegh_s16(a: i16) -> i16 {
-    simd_extract!(vqneg_s16(vdup_n_s16(a)), 0)
+pub unsafe fn vfms_laneq_f64<const LANE: i32>(
+    a: float64x1_t,
+    b: float64x1_t,
+    c: float64x2_t,
+) -> float64x1_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegs_s32)
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqneg))]
+#[cfg_attr(test, assert_instr(fmsub))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqnegs_s32(a: i32) -> i32 {
-    simd_extract!(vqneg_s32(vdup_n_s32(a)), 0)
+pub unsafe fn vfms_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
+    vfms_f64(a, b, vdup_n_f64(c))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegd_s64)
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqneg))]
+#[cfg_attr(test, assert_instr(fmls))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqnegd_s64(a: i64) -> i64 {
-    simd_extract!(vqneg_s64(vdup_n_s64(a)), 0)
+pub unsafe fn vfmsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    let b: float64x2_t = simd_neg(b);
+    vfmaq_f64(a, b, c)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_s8)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqsub))]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqsubb_s8(a: i8, b: i8) -> i8 {
-    let a: int8x8_t = vdup_n_s8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract!(vqsub_s8(a, b), 0)
+pub unsafe fn vfmsq_lane_f64<const LANE: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+    c: float64x1_t,
+) -> float64x2_t {
+    static_assert!(LANE == 0);
+    vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_s16)
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqsub))]
+#[cfg_attr(test, assert_instr(fmls))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqsub_s16(a, b), 0)
+pub unsafe fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
+    vfmsq_f64(a, b, vdupq_n_f64(c))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_u8)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqsub))]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqsubb_u8(a: u8, b: u8) -> u8 {
-    let a: uint8x8_t = vdup_n_u8(a);
-    let b: uint8x8_t = vdup_n_u8(b);
-    simd_extract!(vqsub_u8(a, b), 0)
+pub unsafe fn vfmss_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
+    vfmas_lane_f32::<LANE>(a, -b, c)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_u16)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqsub))]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
-    let a: uint16x4_t = vdup_n_u16(a);
-    let b: uint16x4_t = vdup_n_u16(b);
-    simd_extract!(vqsub_u16(a, b), 0)
+pub unsafe fn vfmss_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
+    vfmas_laneq_f32::<LANE>(a, -b, c)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_u32)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqsub))]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.i32")]
-        fn vqsubs_u32_(a: u32, b: u32) -> u32;
-    }
-    vqsubs_u32_(a, b)
+pub unsafe fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
+    vfmad_lane_f64::<LANE>(a, -b, c)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubd_u64)
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqsub))]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.i64")]
-        fn vqsubd_u64_(a: u64, b: u64) -> u64;
-    }
-    vqsubd_u64_(a, b)
+pub unsafe fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
+    vfmad_laneq_f64::<LANE>(a, -b, c)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqsub))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.i32")]
-        fn vqsubs_s32_(a: i32, b: i32) -> i32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64"
+        )]
+        fn _vld1_f64_x2(a: *const f64) -> float64x1x2_t;
     }
-    vqsubs_s32_(a, b)
+    _vld1_f64_x2(a)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubd_s64)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqsub))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.i64")]
-        fn vqsubd_s64_(a: i64, b: i64) -> i64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64"
+        )]
+        fn _vld1_f64_x3(a: *const f64) -> float64x1x3_t;
     }
-    vqsubd_s64_(a, b)
+    _vld1_f64_x3(a)
 }
 
-/// Reverse bit order
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_s8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rbit))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rbit.v8i8")]
-        fn vrbit_s8_(a: int8x8_t) -> int8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64"
+        )]
+        fn _vld1_f64_x4(a: *const f64) -> float64x1x4_t;
     }
-    vrbit_s8_(a)
+    _vld1_f64_x4(a)
 }
 
-/// Reverse bit order
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_s8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rbit))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rbit.v16i8")]
-        fn vrbitq_s8_(a: int8x16_t) -> int8x16_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64"
+        )]
+        fn _vld1q_f64_x2(a: *const f64) -> float64x2x2_t;
     }
-    vrbitq_s8_(a)
+    _vld1q_f64_x2(a)
 }
 
-/// Reverse bit order
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rbit))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
-    transmute(vrbit_s8(transmute(a)))
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64"
+        )]
+        fn _vld1q_f64_x3(a: *const f64) -> float64x2x3_t;
+    }
+    _vld1q_f64_x3(a)
 }
 
-/// Reverse bit order
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rbit))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
-    transmute(vrbitq_s8(transmute(a)))
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64"
+        )]
+        fn _vld1q_f64_x4(a: *const f64) -> float64x2x4_t;
+    }
+    _vld1q_f64_x4(a)
 }
 
-/// Reverse bit order
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rbit))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
-    transmute(vrbit_s8(transmute(a)))
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64"
+        )]
+        fn _vld2_dup_f64(ptr: *const f64) -> float64x1x2_t;
+    }
+    _vld2_dup_f64(a as _)
 }
 
-/// Reverse bit order
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rbit))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
-    transmute(vrbitq_s8(transmute(a)))
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64"
+        )]
+        fn _vld2q_dup_f64(ptr: *const f64) -> float64x2x2_t;
+    }
+    _vld2q_dup_f64(a as _)
 }
 
-/// Floating-point round to integral exact, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.rint.v2f32")]
-        fn vrndx_f32_(a: float32x2_t) -> float32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64"
+        )]
+        fn _vld2q_dup_s64(ptr: *const i64) -> int64x2x2_t;
     }
-    vrndx_f32_(a)
+    _vld2q_dup_s64(a as _)
 }
 
-/// Floating-point round to integral exact, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.rint.v4f32")]
-        fn vrndxq_f32_(a: float32x4_t) -> float32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64"
+        )]
+        fn _vld2_f64(ptr: *const float64x1_t) -> float64x1x2_t;
     }
-    vrndxq_f32_(a)
+    _vld2_f64(a as _)
 }
 
-/// Floating-point round to integral exact, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintx))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) -> float64x1x2_t {
+    static_assert!(LANE == 0);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.rint.v1f64")]
-        fn vrndx_f64_(a: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8"
+        )]
+        fn _vld2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *const i8) -> float64x1x2_t;
     }
-    vrndx_f64_(a)
+    _vld2_lane_f64(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Floating-point round to integral exact, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintx))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> int64x1x2_t {
+    static_assert!(LANE == 0);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.rint.v2f64")]
-        fn vrndxq_f64_(a: float64x2_t) -> float64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8"
+        )]
+        fn _vld2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *const i8) -> int64x1x2_t;
     }
-    vrndxq_f64_(a)
+    _vld2_lane_s64(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Floating-point round to integral, to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frinta))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.round.v2f32")]
-        fn vrnda_f32_(a: float32x2_t) -> float32x2_t;
-    }
-    vrnda_f32_(a)
+pub unsafe fn vld2_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x2_t) -> poly64x1x2_t {
+    static_assert!(LANE == 0);
+    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Floating-point round to integral, to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frinta))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.round.v4f32")]
-        fn vrndaq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-    vrndaq_f32_(a)
+pub unsafe fn vld2_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x2_t) -> uint64x1x2_t {
+    static_assert!(LANE == 0);
+    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Floating-point round to integral, to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frinta))]
+#[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.round.v1f64")]
-        fn vrnda_f64_(a: float64x1_t) -> float64x1_t;
-    }
-    vrnda_f64_(a)
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
+    transmute(vld2q_dup_s64(transmute(a)))
 }
 
-/// Floating-point round to integral, to nearest with ties to away
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frinta))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.round.v2f64")]
-        fn vrndaq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrndaq_f64_(a)
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
+    transmute(vld2q_dup_s64(transmute(a)))
 }
 
-/// Floating-point round to integral, to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frintn.v1f64")]
-        fn vrndn_f64_(a: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64"
+        )]
+        fn _vld2q_f64(ptr: *const float64x2_t) -> float64x2x2_t;
     }
-    vrndn_f64_(a)
+    _vld2q_f64(a as _)
 }
 
-/// Floating-point round to integral, to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frintn.v2f64")]
-        fn vrndnq_f64_(a: float64x2_t) -> float64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64"
+        )]
+        fn _vld2q_s64(ptr: *const int64x2_t) -> int64x2x2_t;
     }
-    vrndnq_f64_(a)
+    _vld2q_s64(a as _)
 }
 
-/// Floating-point round to integral, to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintn))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndns_f32(a: f32) -> f32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -> float64x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.roundeven.f32")]
-        fn vrndns_f32_(a: f32) -> f32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8"
+        )]
+        fn _vld2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *const i8)
+            -> float64x2x2_t;
     }
-    vrndns_f32_(a)
+    _vld2q_lane_f64(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Floating-point round to integral, toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintm))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> int8x16x2_t {
+    static_assert_uimm_bits!(LANE, 4);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.floor.v2f32")]
-        fn vrndm_f32_(a: float32x2_t) -> float32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8"
+        )]
+        fn _vld2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *const i8) -> int8x16x2_t;
     }
-    vrndm_f32_(a)
+    _vld2q_lane_s8(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Floating-point round to integral, toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintm))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) -> int64x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.floor.v4f32")]
-        fn vrndmq_f32_(a: float32x4_t) -> float32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8"
+        )]
+        fn _vld2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *const i8) -> int64x2x2_t;
     }
-    vrndmq_f32_(a)
+    _vld2q_lane_s64(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Floating-point round to integral, toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintm))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.floor.v1f64")]
-        fn vrndm_f64_(a: float64x1_t) -> float64x1_t;
-    }
-    vrndm_f64_(a)
+pub unsafe fn vld2q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x2_t) -> poly64x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Floating-point round to integral, toward minus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintm))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.floor.v2f64")]
-        fn vrndmq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrndmq_f64_(a)
+pub unsafe fn vld2q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x2_t) -> uint8x16x2_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Floating-point round to integral, toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintp))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.ceil.v2f32")]
-        fn vrndp_f32_(a: float32x2_t) -> float32x2_t;
-    }
-    vrndp_f32_(a)
+pub unsafe fn vld2q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x2_t) -> uint64x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Floating-point round to integral, toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintp))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.ceil.v4f32")]
-        fn vrndpq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-    vrndpq_f32_(a)
+pub unsafe fn vld2q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x2_t) -> poly8x16x2_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Floating-point round to integral, toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintp))]
+#[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.ceil.v1f64")]
-        fn vrndp_f64_(a: float64x1_t) -> float64x1_t;
-    }
-    vrndp_f64_(a)
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
+    transmute(vld2q_s64(transmute(a)))
 }
 
-/// Floating-point round to integral, toward plus infinity
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.ceil.v2f64")]
-        fn vrndpq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrndpq_f64_(a)
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t {
+    transmute(vld2q_s64(transmute(a)))
 }
 
-/// Floating-point round to integral, toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintz))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.trunc.v2f32")]
-        fn vrnd_f32_(a: float32x2_t) -> float32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64"
+        )]
+        fn _vld3_dup_f64(ptr: *const f64) -> float64x1x3_t;
     }
-    vrnd_f32_(a)
+    _vld3_dup_f64(a as _)
 }
 
-/// Floating-point round to integral, toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintz))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.trunc.v4f32")]
-        fn vrndq_f32_(a: float32x4_t) -> float32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64"
+        )]
+        fn _vld3q_dup_f64(ptr: *const f64) -> float64x2x3_t;
     }
-    vrndq_f32_(a)
+    _vld3q_dup_f64(a as _)
 }
 
-/// Floating-point round to integral, toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintz))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.trunc.v1f64")]
-        fn vrnd_f64_(a: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64"
+        )]
+        fn _vld3q_dup_s64(ptr: *const i64) -> int64x2x3_t;
     }
-    vrnd_f64_(a)
+    _vld3q_dup_s64(a as _)
 }
 
-/// Floating-point round to integral, toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintz))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.trunc.v2f64")]
-        fn vrndq_f64_(a: float64x2_t) -> float64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64"
+        )]
+        fn _vld3_f64(ptr: *const float64x1_t) -> float64x1x3_t;
     }
-    vrndq_f64_(a)
+    _vld3_f64(a as _)
 }
 
-/// Floating-point round to integral, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frinti))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) -> float64x1x3_t {
+    static_assert!(LANE == 0);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.nearbyint.v2f32")]
-        fn vrndi_f32_(a: float32x2_t) -> float32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8"
+        )]
+        fn _vld3_lane_f64(
+            a: float64x1_t,
+            b: float64x1_t,
+            c: float64x1_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float64x1x3_t;
     }
-    vrndi_f32_(a)
+    _vld3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Floating-point round to integral, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frinti))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.nearbyint.v4f32")]
-        fn vrndiq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-    vrndiq_f32_(a)
+pub unsafe fn vld3_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x3_t) -> poly64x1x3_t {
+    static_assert!(LANE == 0);
+    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Floating-point round to integral, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frinti))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> int64x1x3_t {
+    static_assert!(LANE == 0);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.nearbyint.v1f64")]
-        fn vrndi_f64_(a: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8"
+        )]
+        fn _vld3_lane_s64(
+            a: int64x1_t,
+            b: int64x1_t,
+            c: int64x1_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int64x1x3_t;
     }
-    vrndi_f64_(a)
+    _vld3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Floating-point round to integral, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frinti))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.nearbyint.v2f64")]
-        fn vrndiq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrndiq_f64_(a)
+pub unsafe fn vld3_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x3_t) -> uint64x1x3_t {
+    static_assert!(LANE == 0);
+    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_s8)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqadd))]
+#[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqaddb_s8(a: i8, b: i8) -> i8 {
-    let a: int8x8_t = vdup_n_s8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract!(vqadd_s8(a, b), 0)
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
+    transmute(vld3q_dup_s64(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_s16)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqadd_s16(a, b), 0)
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
+    transmute(vld3q_dup_s64(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_u8)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqaddb_u8(a: u8, b: u8) -> u8 {
-    let a: uint8x8_t = vdup_n_u8(a);
-    let b: uint8x8_t = vdup_n_u8(b);
-    simd_extract!(vqadd_u8(a, b), 0)
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64"
+        )]
+        fn _vld3q_f64(ptr: *const float64x2_t) -> float64x2x3_t;
+    }
+    _vld3q_f64(a as _)
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_u16)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
-    let a: uint16x4_t = vdup_n_u16(a);
-    let b: uint16x4_t = vdup_n_u16(b);
-    simd_extract!(vqadd_u16(a, b), 0)
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64"
+        )]
+        fn _vld3q_s64(ptr: *const int64x2_t) -> int64x2x3_t;
+    }
+    _vld3q_s64(a as _)
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_u32)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqadd))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -> float64x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.i32")]
-        fn vqadds_u32_(a: u32, b: u32) -> u32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8"
+        )]
+        fn _vld3q_lane_f64(
+            a: float64x2_t,
+            b: float64x2_t,
+            c: float64x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float64x2x3_t;
     }
-    vqadds_u32_(a, b)
+    _vld3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddd_u64)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqadd))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.i64")]
-        fn vqaddd_u64_(a: u64, b: u64) -> u64;
-    }
-    vqaddd_u64_(a, b)
+pub unsafe fn vld3q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x3_t) -> poly64x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_s32)
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqadd))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> int8x16x3_t {
+    static_assert_uimm_bits!(LANE, 3);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.i32")]
-        fn vqadds_s32_(a: i32, b: i32) -> i32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8"
+        )]
+        fn _vld3q_lane_s8(
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x16x3_t;
     }
-    vqadds_s32_(a, b)
+    _vld3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddd_s64)
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqadd))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) -> int64x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.i64")]
-        fn vqaddd_s64_(a: i64, b: i64) -> i64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8"
+        )]
+        fn _vld3q_lane_s64(
+            a: int64x2_t,
+            b: int64x2_t,
+            c: int64x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int64x2x3_t;
     }
-    vqaddd_s64_(a, b)
+    _vld3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x2)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld1))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64")]
-        fn vld1_f64_x2_(a: *const f64) -> float64x1x2_t;
-    }
-    vld1_f64_x2_(a)
+pub unsafe fn vld3q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x3_t) -> uint8x16x3_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x2)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld1))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64")]
-        fn vld1q_f64_x2_(a: *const f64) -> float64x2x2_t;
-    }
-    vld1q_f64_x2_(a)
+pub unsafe fn vld3q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x3_t) -> uint64x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld1))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64")]
-        fn vld1_f64_x3_(a: *const f64) -> float64x1x3_t;
-    }
-    vld1_f64_x3_(a)
+pub unsafe fn vld3q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x3_t) -> poly8x16x3_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld1))]
+#[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64")]
-        fn vld1q_f64_x3_(a: *const f64) -> float64x2x3_t;
-    }
-    vld1q_f64_x3_(a)
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
+    transmute(vld3q_s64(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64")]
-        fn vld1_f64_x4_(a: *const f64) -> float64x1x4_t;
-    }
-    vld1_f64_x4_(a)
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t {
+    transmute(vld3q_s64(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x4)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld1))]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64")]
-        fn vld1q_f64_x4_(a: *const f64) -> float64x2x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64"
+        )]
+        fn _vld4_dup_f64(ptr: *const f64) -> float64x1x4_t;
     }
-    vld1q_f64_x4_(a)
+    _vld4_dup_f64(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s64)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64")]
-        fn vld2q_s64_(ptr: *const int64x2_t) -> int64x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64"
+        )]
+        fn _vld4q_dup_f64(ptr: *const f64) -> float64x2x4_t;
     }
-    vld2q_s64_(a as _)
+    _vld4q_dup_f64(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t {
-    transmute(vld2q_s64(transmute(a)))
+pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64"
+        )]
+        fn _vld4q_dup_s64(ptr: *const i64) -> int64x2x4_t;
+    }
+    _vld4q_dup_s64(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld2))]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
-    transmute(vld2q_s64(transmute(a)))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64"
+        )]
+        fn _vld4_f64(ptr: *const float64x1_t) -> float64x1x4_t;
+    }
+    _vld4_f64(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) -> float64x1x4_t {
+    static_assert!(LANE == 0);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64")]
-        fn vld2_f64_(ptr: *const float64x1_t) -> float64x1x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8"
+        )]
+        fn _vld4_lane_f64(
+            a: float64x1_t,
+            b: float64x1_t,
+            c: float64x1_t,
+            d: float64x1_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float64x1x4_t;
     }
-    vld2_f64_(a as _)
+    _vld4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> int64x1x4_t {
+    static_assert!(LANE == 0);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64")]
-        fn vld2q_f64_(ptr: *const float64x2_t) -> float64x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8"
+        )]
+        fn _vld4_lane_s64(
+            a: int64x1_t,
+            b: int64x1_t,
+            c: int64x1_t,
+            d: int64x1_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int64x1x4_t;
     }
-    vld2q_f64_(a as _)
+    _vld4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64")]
-        fn vld2q_dup_s64_(ptr: *const i64) -> int64x2x2_t;
-    }
-    vld2q_dup_s64_(a as _)
+pub unsafe fn vld4_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x4_t) -> poly64x1x4_t {
+    static_assert!(LANE == 0);
+    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
-    transmute(vld2q_dup_s64(transmute(a)))
+pub unsafe fn vld4_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x4_t) -> uint64x1x4_t {
+    static_assert!(LANE == 0);
+    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld2r))]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
-    transmute(vld2q_dup_s64(transmute(a)))
+pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
+    transmute(vld4q_dup_s64(transmute(a)))
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f64)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64")]
-        fn vld2_dup_f64_(ptr: *const f64) -> float64x1x2_t;
-    }
-    vld2_dup_f64_(a as _)
+pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
+    transmute(vld4q_dup_s64(transmute(a)))
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64")]
-        fn vld2q_dup_f64_(ptr: *const f64) -> float64x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64"
+        )]
+        fn _vld4q_f64(ptr: *const float64x2_t) -> float64x2x4_t;
     }
-    vld2q_dup_f64_(a as _)
+    _vld4q_f64(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> int8x16x2_t {
-    static_assert_uimm_bits!(LANE, 4);
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8")]
-        fn vld2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *const i8) -> int8x16x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64"
+        )]
+        fn _vld4q_s64(ptr: *const int64x2_t) -> int64x2x4_t;
     }
-    vld2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
+    _vld4q_s64(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> int64x1x2_t {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -> float64x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8")]
-        fn vld2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *const i8) -> int64x1x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8"
+        )]
+        fn _vld4q_lane_f64(
+            a: float64x2_t,
+            b: float64x2_t,
+            c: float64x2_t,
+            d: float64x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float64x2x4_t;
     }
-    vld2_lane_s64_(b.0, b.1, LANE as i64, a as _)
+    _vld4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) -> int64x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> int8x16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8")]
-        fn vld2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *const i8) -> int64x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8"
+        )]
+        fn _vld4q_lane_s8(
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x16x4_t;
     }
-    vld2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
+    _vld4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x2_t) -> poly64x1x2_t {
-    static_assert!(LANE == 0);
-    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) -> int64x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8"
+        )]
+        fn _vld4q_lane_s64(
+            a: int64x2_t,
+            b: int64x2_t,
+            c: int64x2_t,
+            d: int64x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int64x2x4_t;
+    }
+    _vld4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x2_t) -> poly64x2x2_t {
+pub unsafe fn vld4q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x4_t) -> poly64x2x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x2_t) -> uint8x16x2_t {
+pub unsafe fn vld4q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x4_t) -> uint8x16x4_t {
     static_assert_uimm_bits!(LANE, 4);
-    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
+    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x2_t) -> uint64x1x2_t {
-    static_assert!(LANE == 0);
-    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x4_t) -> uint64x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x2_t) -> uint64x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x4_t) -> poly8x16x4_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x2_t) -> poly8x16x2_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
+    transmute(vld4q_s64(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) -> float64x1x2_t {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8")]
-        fn vld2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *const i8) -> float64x1x2_t;
-    }
-    vld2_lane_f64_(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
+    transmute(vld4q_s64(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f64)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -> float64x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(fmax))]
+pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8")]
-        fn vld2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *const i8) -> float64x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v1f64"
+        )]
+        fn _vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
-    vld2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
+    _vmax_f64(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s64)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(fmax))]
+pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64")]
-        fn vld3q_s64_(ptr: *const int64x2_t) -> int64x2x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v2f64"
+        )]
+        fn _vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    vld3q_s64_(a as _)
+    _vmaxq_f64(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t {
-    transmute(vld3q_s64(transmute(a)))
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v1f64"
+        )]
+        fn _vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vmaxnm_f64(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld3))]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
-    transmute(vld3q_s64(transmute(a)))
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v2f64"
+        )]
+        fn _vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vmaxnmq_f64(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f64)
+#[doc = "Floating-point maximum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64")]
-        fn vld3_f64_(ptr: *const float64x1_t) -> float64x1x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
+        )]
+        fn _vmaxnmv_f32(a: float32x2_t) -> f32;
     }
-    vld3_f64_(a as _)
+    _vmaxnmv_f32(a)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f64)
+#[doc = "Floating-point maximum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64")]
-        fn vld3q_f64_(ptr: *const float64x2_t) -> float64x2x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
+        )]
+        fn _vmaxnmvq_f64(a: float64x2_t) -> f64;
     }
-    vld3q_f64_(a as _)
+    _vmaxnmvq_f64(a)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s64)
+#[doc = "Floating-point maximum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(fmaxnmv))]
+pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64")]
-        fn vld3q_dup_s64_(ptr: *const i64) -> int64x2x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32"
+        )]
+        fn _vmaxnmvq_f32(a: float32x4_t) -> f32;
     }
-    vld3q_dup_s64_(a as _)
+    _vmaxnmvq_f32(a)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
-    transmute(vld3q_dup_s64(transmute(a)))
+#[cfg_attr(test, assert_instr(fmin))]
+pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v1f64"
+        )]
+        fn _vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vmin_f64(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld3r))]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
-    transmute(vld3q_dup_s64(transmute(a)))
+#[cfg_attr(test, assert_instr(fmin))]
+pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v2f64"
+        )]
+        fn _vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vminq_f64(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f64)
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64")]
-        fn vld3_dup_f64_(ptr: *const f64) -> float64x1x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v1f64"
+        )]
+        fn _vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
-    vld3_dup_f64_(a as _)
+    _vminnm_f64(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f64)
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64")]
-        fn vld3q_dup_f64_(ptr: *const f64) -> float64x2x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v2f64"
+        )]
+        fn _vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    vld3q_dup_f64_(a as _)
+    _vminnmq_f64(a, b)
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s8)
+#[doc = "Floating-point minimum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> int8x16x3_t {
-    static_assert_uimm_bits!(LANE, 4);
-    #[allow(improper_ctypes)]
+pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8")]
-        fn vld3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *const i8) -> int8x16x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
+        )]
+        fn _vminnmv_f32(a: float32x2_t) -> f32;
     }
-    vld3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
+    _vminnmv_f32(a)
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s64)
+#[doc = "Floating-point minimum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> int64x1x3_t {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
+pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8")]
-        fn vld3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *const i8) -> int64x1x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
+        )]
+        fn _vminnmvq_f64(a: float64x2_t) -> f64;
     }
-    vld3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
+    _vminnmvq_f64(a)
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s64)
+#[doc = "Floating-point minimum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fminnmv))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) -> int64x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8")]
-        fn vld3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *const i8) -> int64x2x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32"
+        )]
+        fn _vminnmvq_f32(a: float32x4_t) -> f32;
     }
-    vld3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
+    _vminnmvq_f32(a)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p64)
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x3_t) -> poly64x1x3_t {
-    static_assert!(LANE == 0);
-    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vmla_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p64)
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x3_t) -> poly64x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p8)
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x3_t) -> poly8x16x3_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
-}
-
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u8)
+pub unsafe fn vmlal_high_lane_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_high_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x3_t) -> uint8x16x3_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
-}
-
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u64)
+pub unsafe fn vmlal_high_laneq_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_high_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x3_t) -> uint64x1x3_t {
-    static_assert!(LANE == 0);
-    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vmlal_high_lane_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_high_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u64)
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x3_t) -> uint64x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vmlal_high_laneq_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_high_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f64)
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) -> float64x1x3_t {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8")]
-        fn vld3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *const i8) -> float64x1x3_t;
-    }
-    vld3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
-}
-
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f64)
+pub unsafe fn vmlal_high_lane_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x8_t,
+    c: uint16x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_high_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -> float64x2x3_t {
+pub unsafe fn vmlal_high_laneq_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x8_t,
+    c: uint16x8_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_high_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_lane_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x4_t,
+    c: uint32x2_t,
+) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8")]
-        fn vld3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *const i8) -> float64x2x3_t;
-    }
-    vld3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
+    vmlal_high_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s64)
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64")]
-        fn vld4q_s64_(ptr: *const int64x2_t) -> int64x2x4_t;
-    }
-    vld4q_s64_(a as _)
+pub unsafe fn vmlal_high_laneq_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_high_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
+#[cfg_attr(test, assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
-    transmute(vld4q_s64(transmute(a)))
+pub unsafe fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vmlal_high_s16(a, b, vdupq_n_s16(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
-    transmute(vld4q_s64(transmute(a)))
+pub unsafe fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vmlal_high_s32(a, b, vdupq_n_s32(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f64)
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64")]
-        fn vld4_f64_(ptr: *const float64x1_t) -> float64x1x4_t;
-    }
-    vld4_f64_(a as _)
+pub unsafe fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
+    vmlal_high_u16(a, b, vdupq_n_u16(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f64)
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
+#[cfg_attr(test, assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64")]
-        fn vld4q_f64_(ptr: *const float64x2_t) -> float64x2x4_t;
-    }
-    vld4q_f64_(a as _)
+pub unsafe fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
+    vmlal_high_u32(a, b, vdupq_n_u32(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s64)
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64")]
-        fn vld4q_dup_s64_(ptr: *const i64) -> int64x2x4_t;
-    }
-    vld4q_dup_s64_(a as _)
+pub unsafe fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlal_s8(a, b, c)
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
-    transmute(vld4q_dup_s64(transmute(a)))
+pub unsafe fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    vmlal_s16(a, b, c)
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
-    transmute(vld4q_dup_s64(transmute(a)))
+pub unsafe fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
+    vmlal_s32(a, b, c)
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f64)
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64")]
-        fn vld4_dup_f64_(ptr: *const f64) -> float64x1x4_t;
-    }
-    vld4_dup_f64_(a as _)
+pub unsafe fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
+    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlal_u8(a, b, c)
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f64)
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64")]
-        fn vld4q_dup_f64_(ptr: *const f64) -> float64x2x4_t;
-    }
-    vld4q_dup_f64_(a as _)
+pub unsafe fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    vmlal_u16(a, b, c)
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s8)
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> int8x16x4_t {
-    static_assert_uimm_bits!(LANE, 4);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8")]
-        fn vld4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *const i8) -> int8x16x4_t;
-    }
-    vld4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
+    vmlal_u32(a, b, c)
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s64)
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> int64x1x4_t {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8")]
-        fn vld4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *const i8) -> int64x1x4_t;
-    }
-    vld4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vmls_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s64)
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) -> int64x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8")]
-        fn vld4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *const i8) -> int64x2x4_t;
-    }
-    vld4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p64)
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x4_t) -> poly64x1x4_t {
-    static_assert!(LANE == 0);
-    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
-}
-
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p64)
+pub unsafe fn vmlsl_high_lane_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_high_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x4_t) -> poly64x2x4_t {
+pub unsafe fn vmlsl_high_laneq_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_high_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_lane_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
+    vmlsl_high_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p8)
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x4_t) -> poly8x16x4_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vmlsl_high_laneq_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_high_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u8)
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x4_t) -> uint8x16x4_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
-}
-
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u64)
+pub unsafe fn vmlsl_high_lane_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x8_t,
+    c: uint16x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_high_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x4_t) -> uint64x1x4_t {
-    static_assert!(LANE == 0);
-    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
-}
-
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u64)
+pub unsafe fn vmlsl_high_laneq_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x8_t,
+    c: uint16x8_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_high_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x4_t) -> uint64x2x4_t {
+pub unsafe fn vmlsl_high_lane_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x4_t,
+    c: uint32x2_t,
+) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
+    vmlsl_high_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f64)
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) -> float64x1x4_t {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8")]
-        fn vld4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *const i8) -> float64x1x4_t;
-    }
-    vld4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vmlsl_high_laneq_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_high_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f64)
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -> float64x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8")]
-        fn vld4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *const i8) -> float64x2x4_t;
-    }
-    vld4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vmlsl_high_s16(a, b, vdupq_n_s16(c))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f64)
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1_t) {
-    static_assert!(LANE == 0);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vmlsl_high_s32(a, b, vdupq_n_s32(c))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f64)
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+pub unsafe fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
+    vmlsl_high_u16(a, b, vdupq_n_u16(c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x2)
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64")]
-        fn vst1_f64_x2_(a: float64x1_t, b: float64x1_t, ptr: *mut f64);
-    }
-    vst1_f64_x2_(b.0, b.1, a)
+pub unsafe fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
+    vmlsl_high_u32(a, b, vdupq_n_u32(c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x2)
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64")]
-        fn vst1q_f64_x2_(a: float64x2_t, b: float64x2_t, ptr: *mut f64);
-    }
-    vst1q_f64_x2_(b.0, b.1, a)
+pub unsafe fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlsl_s8(a, b, c)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x3)
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64")]
-        fn vst1_f64_x3_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64);
-    }
-    vst1_f64_x3_(b.0, b.1, b.2, a)
+pub unsafe fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    vmlsl_s16(a, b, c)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x3)
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64")]
-        fn vst1q_f64_x3_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64);
-    }
-    vst1q_f64_x3_(b.0, b.1, b.2, a)
+pub unsafe fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
+    vmlsl_s32(a, b, c)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x4)
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64")]
-        fn vst1_f64_x4_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut f64);
-    }
-    vst1_f64_x4_(b.0, b.1, b.2, b.3, a)
+pub unsafe fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
+    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlsl_u8(a, b, c)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x4)
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64")]
-        fn vst1q_f64_x4_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut f64);
-    }
-    vst1q_f64_x4_(b.0, b.1, b.2, b.3, a)
+pub unsafe fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    vmlsl_u16(a, b, c)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v2i64.p0i8")]
-        fn vst2q_s64_(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
-    }
-    vst2q_s64_(b.0, b.1, a as _)
+pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
+    vmlsl_u32(a, b, c)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u64)
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) {
-    vst2q_s64(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(sxtl2))]
+pub unsafe fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmovl_s8(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p64)
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st2))]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) {
-    vst2q_s64(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(sxtl2))]
+pub unsafe fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    vmovl_s16(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f64)
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v1f64.p0i8")]
-        fn vst2_f64_(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
-    }
-    vst2_f64_(b.0, b.1, a as _)
+#[cfg_attr(test, assert_instr(sxtl2))]
+pub unsafe fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    vmovl_s32(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f64)
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v2f64.p0i8")]
-        fn vst2q_f64_(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
-    }
-    vst2q_f64_(b.0, b.1, a as _)
+#[cfg_attr(test, assert_instr(uxtl2))]
+pub unsafe fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmovl_u8(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s8)
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8")]
-        fn vst2q_lane_s8_(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8);
-    }
-    vst2q_lane_s8_(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(uxtl2))]
+pub unsafe fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    vmovl_u16(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s64)
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8")]
-        fn vst2_lane_s64_(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8);
-    }
-    vst2_lane_s64_(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(uxtl2))]
+pub unsafe fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
+    vmovl_u32(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s64)
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8")]
-        fn vst2q_lane_s64_(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8);
-    }
-    vst2q_lane_s64_(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    let c: int8x8_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u8)
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x2_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst2q_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    let c: int16x4_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u64)
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x2_t) {
-    static_assert!(LANE == 0);
-    vst2_lane_s64::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    let c: int32x2_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3])
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u64)
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst2q_lane_s64::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    let c: uint8x8_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p8)
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x2_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst2q_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    let c: uint16x4_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64)
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x2_t) {
-    static_assert!(LANE == 0);
-    vst2_lane_s64::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    let c: uint32x2_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3])
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst2q_lane_s64::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(fmul))]
+pub unsafe fn vmul_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8")]
-        fn vst2_lane_f64_(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8);
-    }
-    vst2_lane_f64_(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(fmul))]
+pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8")]
-        fn vst2q_lane_f64_(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8);
-    }
-    vst2q_lane_f64_(b.0, b.1, LANE as i64, a as _)
+pub unsafe fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE == 0);
+    simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v2i64.p0i8")]
-        fn vst3q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
-    }
-    vst3q_s64_(b.0, b.1, b.2, a as _)
+pub unsafe fn vmul_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u64)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) {
-    vst3q_s64(transmute(a), transmute(b))
-}
-
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st3))]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) {
-    vst3q_s64(transmute(a), transmute(b))
+pub unsafe fn vmul_n_f64(a: float64x1_t, b: f64) -> float64x1_t {
+    simd_mul(a, vdup_n_f64(b))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f64)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v1f64.p0i8")]
-        fn vst3_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
-    }
-    vst3_f64_(b.0, b.1, b.2, a as _)
+pub unsafe fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t {
+    simd_mul(a, vdupq_n_f64(b))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f64)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v2f64.p0i8")]
-        fn vst3q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
-    }
-    vst3q_f64_(b.0, b.1, b.2, a as _)
+pub unsafe fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
+    static_assert!(LANE == 0);
+    let b: f64 = simd_extract!(b, LANE as u32);
+    a * b
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s8)
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8")]
-        fn vst3q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8);
-    }
-    vst3q_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
-}
-
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s64)
+pub unsafe fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_high_s16(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8")]
-        fn vst3_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8);
-    }
-    vst3_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
-}
-
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s64)
+pub unsafe fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_high_s16(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
+pub unsafe fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8")]
-        fn vst3q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8);
-    }
-    vst3q_lane_s64_(b.0, b.1, b.2, LANE as i64, a as _)
+    vmull_high_s32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u8)
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x3_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst3q_lane_s8::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_high_s32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u64)
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x3_t) {
-    static_assert!(LANE == 0);
-    vst3_lane_s64::<LANE>(transmute(a), transmute(b))
-}
-
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u64)
+pub unsafe fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_high_u16(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst3q_lane_s64::<LANE>(transmute(a), transmute(b))
-}
-
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p8)
+pub unsafe fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_high_u16(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x3_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst3q_lane_s8::<LANE>(transmute(a), transmute(b))
-}
-
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x3_t) {
-    static_assert!(LANE == 0);
-    vst3_lane_s64::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmull_high_u32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p64)
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst3q_lane_s64::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_high_u32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8")]
-        fn vst3_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8);
-    }
-    vst3_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
+pub unsafe fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
+    vmull_high_s16(a, vdupq_n_s16(b))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8")]
-        fn vst3q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8);
-    }
-    vst3q_lane_f64_(b.0, b.1, b.2, LANE as i64, a as _)
+pub unsafe fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
+    vmull_high_s32(a, vdupq_n_s32(b))
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
+#[cfg_attr(test, assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v2i64.p0i8")]
-        fn vst4q_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
-    }
-    vst4q_s64_(b.0, b.1, b.2, b.3, a as _)
+pub unsafe fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
+    vmull_high_u16(a, vdupq_n_u16(b))
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u64)
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
+#[cfg_attr(test, assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) {
-    vst4q_s64(transmute(a), transmute(b))
+pub unsafe fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
+    vmull_high_u32(a, vdupq_n_u32(b))
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p64)
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st4))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) {
-    vst4q_s64(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(pmull))]
+pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
+    vmull_p64(simd_extract!(a, 1), simd_extract!(b, 1))
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f64)
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v1f64.p0i8")]
-        fn vst4_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
-    }
-    vst4_f64_(b.0, b.1, b.2, b.3, a as _)
+#[cfg_attr(test, assert_instr(pmull))]
+pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: poly8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmull_p8(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f64)
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v2f64.p0i8")]
-        fn vst4q_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
-    }
-    vst4q_f64_(b.0, b.1, b.2, b.3, a as _)
+#[cfg_attr(test, assert_instr(smull2))]
+pub unsafe fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmull_s8(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s8)
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8")]
-        fn vst4q_lane_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, n: i64, ptr: *mut i8);
-    }
-    vst4q_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(smull2))]
+pub unsafe fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    vmull_s16(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s64)
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8")]
-        fn vst4_lane_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, n: i64, ptr: *mut i8);
-    }
-    vst4_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(smull2))]
+pub unsafe fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    vmull_s32(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s64)
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8")]
-        fn vst4q_lane_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, n: i64, ptr: *mut i8);
-    }
-    vst4q_lane_s64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(umull2))]
+pub unsafe fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmull_u8(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u8)
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x4_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst4q_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(umull2))]
+pub unsafe fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    vmull_u16(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u64)
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x4_t) {
-    static_assert!(LANE == 0);
-    vst4_lane_s64::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(umull2))]
+pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    vmull_u32(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u64)
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst4q_lane_s64::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(pmull))]
+pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.pmull64"
+        )]
+        fn _vmull_p64(a: p64, b: p64) -> int8x16_t;
+    }
+    transmute(_vmull_p64(a, b))
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p8)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x4_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst4q_lane_s8::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
+    static_assert!(LANE == 0);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p64)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x4_t) {
-    static_assert!(LANE == 0);
-    vst4_lane_s64::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p64)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x4_t) {
+pub unsafe fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
     static_assert_uimm_bits!(LANE, 1);
-    vst4q_lane_s64::<LANE>(transmute(a), transmute(b))
+    let b: f32 = simd_extract!(b, LANE as u32);
+    a * b
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
-    static_assert!(LANE == 0);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8")]
-        fn vst4_lane_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, n: i64, ptr: *mut i8);
-    }
-    vst4_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
+    static_assert_uimm_bits!(LANE, 2);
+    let b: f32 = simd_extract!(b, LANE as u32);
+    a * b
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
+pub unsafe fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
     static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8")]
-        fn vst4q_lane_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, n: i64, ptr: *mut i8);
-    }
-    vst4q_lane_f64_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
-}
-
-/// Dot product index form with unsigned and signed integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(usdot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub unsafe fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = transmute(c);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vusdot_s32(a, b, transmute(c))
+    let b: f64 = simd_extract!(b, LANE as u32);
+    a * b
 }
 
-/// Dot product index form with unsigned and signed integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(usdot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub unsafe fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = transmute(c);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vusdotq_s32(a, b, transmute(c))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmulx.v2f32"
+        )]
+        fn _vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vmulx_f32(a, b)
 }
 
-/// Dot product index form with signed and unsigned integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub unsafe fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = transmute(c);
-    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vusdot_s32(a, transmute(c), b)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmulx.v4f32"
+        )]
+        fn _vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vmulxq_f32(a, b)
 }
 
-/// Dot product index form with signed and unsigned integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub unsafe fn vsudotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = transmute(c);
-    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vusdotq_s32(a, transmute(c), b)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmulx.v1f64"
+        )]
+        fn _vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vmulx_f64(a, b)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmul_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    simd_mul(a, b)
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmulx.v2f64"
+        )]
+        fn _vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vmulxq_f64(a, b)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_mul(a, b)
+pub unsafe fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmul_n_f64(a: float64x1_t, b: f64) -> float64x1_t {
-    simd_mul(a, vdup_n_f64(b))
+pub unsafe fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t {
-    simd_mul(a, vdupq_n_f64(b))
+pub unsafe fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmulxq_f32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
+pub unsafe fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmulxq_f32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmul_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
+pub unsafe fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
+    vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
+pub unsafe fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     static_assert!(LANE == 0);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+    vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+pub unsafe fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+    vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
-    static_assert_uimm_bits!(LANE, 1);
-    let b: f32 = simd_extract!(b, LANE as u32);
-    a * b
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmulx.f64"
+        )]
+        fn _vmulxd_f64(a: f64, b: f64) -> f64;
+    }
+    _vmulxd_f64(a, b)
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_laneq_f32)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: f32 = simd_extract!(b, LANE as u32);
-    a * b
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmulx.f32"
+        )]
+        fn _vmulxs_f32(a: f32, b: f32) -> f32;
+    }
+    _vmulxs_f32(a, b)
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_lane_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
+pub unsafe fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
     static_assert!(LANE == 0);
-    let b: f64 = simd_extract!(b, LANE as u32);
-    a * b
+    vmulxd_f64(a, simd_extract!(b, LANE as u32))
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_laneq_f64)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
+pub unsafe fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
     static_assert_uimm_bits!(LANE, 1);
-    let b: f64 = simd_extract!(b, LANE as u32);
-    a * b
+    vmulxd_f64(a, simd_extract!(b, LANE as u32))
 }
 
-/// Signed multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s8)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
-    let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmull_s8(a, b)
+pub unsafe fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
+    static_assert_uimm_bits!(LANE, 1);
+    vmulxs_f32(a, simd_extract!(b, LANE as u32))
 }
 
-/// Signed multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s16)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    vmull_s16(a, b)
+pub unsafe fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vmulxs_f32(a, simd_extract!(b, LANE as u32))
 }
 
-/// Signed multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s32)
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    vmull_s32(a, b)
+pub unsafe fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
+    static_assert!(LANE == 0);
+    vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Unsigned multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u8)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmull_u8(a, b)
+#[cfg_attr(test, assert_instr(fneg))]
+pub unsafe fn vneg_f64(a: float64x1_t) -> float64x1_t {
+    simd_neg(a)
 }
 
-/// Unsigned multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u16)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    vmull_u16(a, b)
+#[cfg_attr(test, assert_instr(fneg))]
+pub unsafe fn vnegq_f64(a: float64x2_t) -> float64x2_t {
+    simd_neg(a)
 }
 
-/// Unsigned multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u32)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    vmull_u32(a, b)
+#[cfg_attr(test, assert_instr(neg))]
+pub unsafe fn vneg_s64(a: int64x1_t) -> int64x1_t {
+    simd_neg(a)
 }
 
-/// Polynomial multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p64)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(pmull))]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.pmull64")]
-        fn vmull_p64_(a: p64, b: p64) -> int8x16_t;
-    }
-    transmute(vmull_p64_(a, b))
+#[cfg_attr(test, assert_instr(neg))]
+pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t {
+    simd_neg(a)
 }
 
-/// Polynomial multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p8)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(pmull))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
-    let a: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let b: poly8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmull_p8(a, b)
+#[cfg_attr(test, assert_instr(neg))]
+pub unsafe fn vnegd_s64(a: i64) -> i64 {
+    a.wrapping_neg()
 }
 
-/// Polynomial multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p64)
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(pmull))]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
-    vmull_p64(simd_extract!(a, 1), simd_extract!(b, 1))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vpaddd_f64(a: float64x2_t) -> f64 {
+    let a1: f64 = simd_extract!(a, 0);
+    let a2: f64 = simd_extract!(a, 1);
+    a1 + a2
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadds_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
-    vmull_high_s16(a, vdupq_n_s16(b))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vpadds_f32(a: float32x2_t) -> f32 {
+    let a1: f32 = simd_extract!(a, 0);
+    let a2: f32 = simd_extract!(a, 1);
+    a1 + a2
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s32)
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
-    vmull_high_s32(a, vdupq_n_s32(b))
+#[cfg_attr(test, assert_instr(faddp))]
+pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddp.v4f32"
+        )]
+        fn _vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vpaddq_f32(a, b)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u16)
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
-    vmull_high_u16(a, vdupq_n_u16(b))
+#[cfg_attr(test, assert_instr(faddp))]
+pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddp.v2f64"
+        )]
+        fn _vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vpaddq_f64(a, b)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u32)
+#[doc = "Floating-point Maximum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
-    vmull_high_u32(a, vdupq_n_u32(b))
+pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v2f32"
+        )]
+        fn _vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vpmaxnm_f32(a, b)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s16)
+#[doc = "Floating-point Maximum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v4f32"
+        )]
+        fn _vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vpmaxnmq_f32(a, b)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)
+#[doc = "Floating-point Maximum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v2f64"
+        )]
+        fn _vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vpmaxnmq_f64(a, b)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)
+#[doc = "Floating-point maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
+        )]
+        fn _vpmaxnmqd_f64(a: float64x2_t) -> f64;
+    }
+    _vpmaxnmqd_f64(a)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)
+#[doc = "Floating-point maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
+        )]
+        fn _vpmaxnms_f32(a: float32x2_t) -> f32;
+    }
+    _vpmaxnms_f32(a)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)
+#[doc = "Floating-point maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxqd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"
+        )]
+        fn _vpmaxqd_f64(a: float64x2_t) -> f64;
+    }
+    _vpmaxqd_f64(a)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)
+#[doc = "Floating-point maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"
+        )]
+        fn _vpmaxs_f32(a: float32x2_t) -> f32;
+    }
+    _vpmaxs_f32(a)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v2f32"
+        )]
+        fn _vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vpminnm_f32(a, b)
 }
 
-/// Multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v4f32"
+        )]
+        fn _vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vpminnmq_f32(a, b)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f32)
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx))]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.v2f32")]
-        fn vmulx_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v2f64"
+        )]
+        fn _vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    vmulx_f32_(a, b)
+    _vpminnmq_f64(a, b)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f32)
+#[doc = "Floating-point minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx))]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.v4f32")]
-        fn vmulxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
+        )]
+        fn _vpminnmqd_f64(a: float64x2_t) -> f64;
     }
-    vmulxq_f32_(a, b)
+    _vpminnmqd_f64(a)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f64)
+#[doc = "Floating-point minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx))]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.v1f64")]
-        fn vmulx_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
+        )]
+        fn _vpminnms_f32(a: float32x2_t) -> f32;
     }
-    vmulx_f64_(a, b)
+    _vpminnms_f32(a)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f64)
+#[doc = "Floating-point minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.v2f64")]
-        fn vmulxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminv.f64.v2f64"
+        )]
+        fn _vpminqd_f64(a: float64x2_t) -> f64;
     }
-    vmulxq_f64_(a, b)
+    _vpminqd_f64(a)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f64)
+#[doc = "Floating-point minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminv.f32.v2f32"
+        )]
+        fn _vpmins_f32(a: float32x2_t) -> f32;
+    }
+    _vpmins_f32(a)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f64)
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v1i64"
+        )]
+        fn _vqabs_s64(a: int64x1_t) -> int64x1_t;
+    }
+    _vqabs_s64(a)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v2i64"
+        )]
+        fn _vqabsq_s64(a: int64x2_t) -> int64x2_t;
+    }
+    _vqabsq_s64(a)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f32)
+#[doc = "Signed saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsb_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabsb_s8(a: i8) -> i8 {
+    simd_extract!(vqabs_s8(vdup_n_s8(a)), 0)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f32)
+#[doc = "Signed saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabsh_s16(a: i16) -> i16 {
+    simd_extract!(vqabs_s16(vdup_n_s16(a)), 0)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f32)
+#[doc = "Signed saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabss_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabss_s32(a: i32) -> i32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.i32"
+        )]
+        fn _vqabss_s32(a: i32) -> i32;
+    }
+    _vqabss_s32(a)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f64)
+#[doc = "Signed saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
-    static_assert!(LANE == 0);
-    vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabsd_s64(a: i64) -> i64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.i64"
+        )]
+        fn _vqabsd_s64(a: i64) -> i64;
+    }
+    _vqabsd_s64(a)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f64)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(test, assert_instr(sqadd))]
+pub unsafe fn vqaddb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract!(vqadd_s8(a, b), 0)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_f32)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.f32")]
-        fn vmulxs_f32_(a: f32, b: f32) -> f32;
-    }
-    vmulxs_f32_(a, b)
-}
-
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_f64)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmulx.f64")]
-        fn vmulxd_f64_(a: f64, b: f64) -> f64;
-    }
-    vmulxd_f64_(a, b)
+#[cfg_attr(test, assert_instr(sqadd))]
+pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqadd_s16(a, b), 0)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_lane_f32)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulxs_f32(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(uqadd))]
+pub unsafe fn vqaddb_u8(a: u8, b: u8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: uint8x8_t = vdup_n_u8(b);
+    simd_extract!(vqadd_u8(a, b), 0)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_laneq_f32)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vmulxs_f32(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(uqadd))]
+pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: uint16x4_t = vdup_n_u16(b);
+    simd_extract!(vqadd_u16(a, b), 0)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_lane_f64)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
-    static_assert!(LANE == 0);
-    vmulxd_f64(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(sqadd))]
+pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.i32"
+        )]
+        fn _vqadds_s32(a: i32, b: i32) -> i32;
+    }
+    _vqadds_s32(a, b)
 }
 
-/// Floating-point multiply extended
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_laneq_f64)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulxd_f64(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(sqadd))]
+pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.i64"
+        )]
+        fn _vqaddd_s64(a: i64, b: i64) -> i64;
+    }
+    _vqaddd_s64(a, b)
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f64)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.v1f64")]
-        fn vfma_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.i32"
+        )]
+        fn _vqadds_u32(a: i32, b: i32) -> i32;
     }
-    vfma_f64_(b, c, a)
+    _vqadds_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f64)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.v2f64")]
-        fn vfmaq_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.i64"
+        )]
+        fn _vqaddd_u64(a: i64, b: i64) -> i64;
     }
-    vfmaq_f64_(b, c, a)
+    _vqaddd_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f64)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd))]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
-    vfma_f64(a, b, vdup_n_f64(c))
+pub unsafe fn vqdmlal_high_lane_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f64)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla))]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
-    vfmaq_f64(a, b, vdupq_n_f64(c))
+pub unsafe fn vqdmlal_high_laneq_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    vqaddq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f32)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_high_lane_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqaddq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f32)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_high_laneq_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f32)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_high_n_s16(b, c))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f32)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_high_s16(b, c))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f64)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_high_n_s32(b, c))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f64)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t, c: float64x2_t) -> float64x1_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_high_s32(b, c))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f64)
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c: float64x1_t) -> float64x2_t {
-    static_assert!(LANE == 0);
-    vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_laneq_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    vqaddq_s32(a, vqdmull_laneq_s16::<N>(b, c))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f64)
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_laneq_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s64(a, vqdmull_laneq_s32::<N>(b, c))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_lane_f32)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.f32")]
-        fn vfmas_lane_f32_(a: f32, b: f32, c: f32) -> f32;
-    }
-    static_assert_uimm_bits!(LANE, 1);
-    let c: f32 = simd_extract!(c, LANE as u32);
-    vfmas_lane_f32_(b, c, a)
+pub unsafe fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_laneq_f32)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.f32")]
-        fn vfmas_laneq_f32_(a: f32, b: f32, c: f32) -> f32;
-    }
-    static_assert_uimm_bits!(LANE, 2);
-    let c: f32 = simd_extract!(c, LANE as u32);
-    vfmas_laneq_f32_(b, c, a)
+pub unsafe fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_lane_f64)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.f64")]
-        fn vfmad_lane_f64_(a: f64, b: f64, c: f64) -> f64;
-    }
-    static_assert!(LANE == 0);
-    let c: f64 = simd_extract!(c, LANE as u32);
-    vfmad_lane_f64_(b, c, a)
+pub unsafe fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
+    static_assert_uimm_bits!(LANE, 1);
+    vqdmlals_s32(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point fused multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_laneq_f64)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.f64")]
-        fn vfmad_laneq_f64_(a: f64, b: f64, c: f64) -> f64;
-    }
-    static_assert_uimm_bits!(LANE, 1);
-    let c: f64 = simd_extract!(c, LANE as u32);
-    vfmad_laneq_f64_(b, c, a)
+pub unsafe fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmlals_s32(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point fused multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub))]
+#[cfg_attr(test, assert_instr(sqdmlal))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    let b: float64x1_t = simd_neg(b);
-    vfma_f64(a, b, c)
+pub unsafe fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
+    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
+    vqadds_s32(a, simd_extract!(x, 0))
 }
 
-/// Floating-point fused multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f64)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls))]
+#[cfg_attr(test, assert_instr(sqdmlal))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    let b: float64x2_t = simd_neg(b);
-    vfmaq_f64(a, b, c)
+pub unsafe fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
+    let x: i64 = vqaddd_s64(a, vqdmulls_s32(b, c));
+    x as i64
 }
 
-/// Floating-point fused Multiply-subtract to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f64)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub))]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
-    vfms_f64(a, b, vdup_n_f64(c))
+pub unsafe fn vqdmlsl_high_lane_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
 }
 
-/// Floating-point fused Multiply-subtract to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f64)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls))]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
-    vfmsq_f64(a, b, vdupq_n_f64(c))
+pub unsafe fn vqdmlsl_high_laneq_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    vqsubq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f32)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_high_lane_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqsubq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f32)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_high_laneq_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f32)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_high_n_s16(b, c))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f32)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_high_s16(b, c))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f64)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_high_n_s32(b, c))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f64)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t, c: float64x2_t) -> float64x1_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_high_s32(b, c))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f64)
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c: float64x1_t) -> float64x2_t {
-    static_assert!(LANE == 0);
-    vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_laneq_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    vqsubq_s32(a, vqdmull_laneq_s16::<N>(b, c))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f64)
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_laneq_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s64(a, vqdmull_laneq_s32::<N>(b, c))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_lane_f32)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmss_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
-    vfmas_lane_f32::<LANE>(a, -b, c)
+pub unsafe fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_laneq_f32)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmss_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
-    vfmas_laneq_f32::<LANE>(a, -b, c)
+pub unsafe fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_lane_f64)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
-    vfmad_lane_f64::<LANE>(a, -b, c)
+pub unsafe fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
+    static_assert_uimm_bits!(LANE, 1);
+    vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point fused multiply-subtract to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_laneq_f64)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
-    vfmad_laneq_f64::<LANE>(a, -b, c)
+pub unsafe fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Divide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f32)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fdiv))]
+#[cfg_attr(test, assert_instr(sqdmlsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdiv_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_div(a, b)
+pub unsafe fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
+    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
+    vqsubs_s32(a, simd_extract!(x, 0))
 }
 
-/// Divide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f32)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fdiv))]
+#[cfg_attr(test, assert_instr(sqdmlsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdivq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_div(a, b)
+pub unsafe fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
+    let x: i64 = vqsubd_s64(a, vqdmulls_s32(b, c));
+    x as i64
 }
 
-/// Divide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f64)
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fdiv))]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdiv_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    simd_div(a, b)
+pub unsafe fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32)))
 }
 
-/// Divide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f64)
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fdiv))]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_div(a, b)
+pub unsafe fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32)))
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsub))]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    simd_sub(a, b)
+pub unsafe fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32)))
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsub))]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_sub(a, b)
+pub unsafe fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32)))
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
-    a.wrapping_sub(b)
+pub unsafe fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(N, 2);
+    let b: i16 = simd_extract!(b, N as u32);
+    vqdmulhh_s16(a, b)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
-    a.wrapping_sub(b)
+pub unsafe fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(N, 3);
+    let b: i16 = simd_extract!(b, N as u32);
+    vqdmulhh_s16(a, b)
 }
 
-/// Add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddd_s64)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sqdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
-    a.wrapping_add(b)
+pub unsafe fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqdmulh_s16(a, b), 0)
 }
 
-/// Add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddd_u64)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sqdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
-    a.wrapping_add(b)
+pub unsafe fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    simd_extract!(vqdmulh_s32(a, b), 0)
 }
 
-/// Floating-point add across vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_f32)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(faddp))]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddv_f32(a: float32x2_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddv.f32.v2f32")]
-        fn vaddv_f32_(a: float32x2_t) -> f32;
-    }
-    vaddv_f32_(a)
+pub unsafe fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(N, 1);
+    let b: i32 = simd_extract!(b, N as u32);
+    vqdmulhs_s32(a, b)
 }
 
-/// Floating-point add across vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_f32)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(faddp))]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_f32(a: float32x4_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddv.f32.v4f32")]
-        fn vaddvq_f32_(a: float32x4_t) -> f32;
-    }
-    vaddvq_f32_(a)
+pub unsafe fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(N, 2);
+    let b: i32 = simd_extract!(b, N as u32);
+    vqdmulhs_s32(a, b)
 }
 
-/// Floating-point add across vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_f64)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(faddp))]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddv.f64.v2f64")]
-        fn vaddvq_f64_(a: float64x2_t) -> f64;
-    }
-    vaddvq_f64_(a)
+pub unsafe fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
 
-/// Signed Add Long across Vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_s16)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(saddlv))]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.saddlv.i32.v4i16")]
-        fn vaddlv_s16_(a: int16x4_t) -> i32;
-    }
-    vaddlv_s16_(a)
+pub unsafe fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
 
-/// Signed Add Long across Vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_s16)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(saddlv))]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.saddlv.i32.v8i16")]
-        fn vaddlvq_s16_(a: int16x8_t) -> i32;
-    }
-    vaddlvq_s16_(a)
+pub unsafe fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
 
-/// Signed Add Long across Vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_s32)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(saddlp))]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 4))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.saddlv.i64.v2i32")]
-        fn vaddlv_s32_(a: int32x2_t) -> i64;
-    }
-    vaddlv_s32_(a)
+pub unsafe fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
 
-/// Signed Add Long across Vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_s32)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(saddlv))]
+#[cfg_attr(test, assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.saddlv.i64.v4i32")]
-        fn vaddlvq_s32_(a: int32x4_t) -> i64;
-    }
-    vaddlvq_s32_(a)
+pub unsafe fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = vdup_n_s16(b);
+    vqdmull_s16(a, b)
 }
 
-/// Unsigned Add Long across Vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_u16)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uaddlv))]
+#[cfg_attr(test, assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uaddlv.i32.v4i16")]
-        fn vaddlv_u16_(a: uint16x4_t) -> u32;
-    }
-    vaddlv_u16_(a)
+pub unsafe fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = vdup_n_s32(b);
+    vqdmull_s32(a, b)
 }
 
-/// Unsigned Add Long across Vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_u16)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uaddlv))]
+#[cfg_attr(test, assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uaddlv.i32.v8i16")]
-        fn vaddlvq_u16_(a: uint16x8_t) -> u32;
-    }
-    vaddlvq_u16_(a)
+pub unsafe fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    vqdmull_s16(a, b)
 }
 
-/// Unsigned Add Long across Vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_u32)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uaddlp))]
+#[cfg_attr(test, assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uaddlv.i64.v2i32")]
-        fn vaddlv_u32_(a: uint32x2_t) -> u64;
-    }
-    vaddlv_u32_(a)
+pub unsafe fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    vqdmull_s32(a, b)
 }
 
-/// Unsigned Add Long across Vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_u32)
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uaddlv))]
+#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uaddlv.i64.v4i32")]
-        fn vaddlvq_u32_(a: uint32x4_t) -> u64;
-    }
-    vaddlvq_u32_(a)
+pub unsafe fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
 
-/// Signed Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ssubw))]
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
-    let c: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
 
-/// Signed Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ssubw))]
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
-    let c: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
+    static_assert_uimm_bits!(N, 2);
+    let b: i16 = simd_extract!(b, N as u32);
+    vqdmullh_s16(a, b)
 }
 
-/// Signed Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ssubw))]
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
-    let c: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
+    static_assert_uimm_bits!(N, 2);
+    let b: i32 = simd_extract!(b, N as u32);
+    vqdmulls_s32(a, b)
 }
 
-/// Unsigned Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usubw))]
+#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
-    let c: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
+    static_assert_uimm_bits!(N, 3);
+    let b: i16 = simd_extract!(b, N as u32);
+    vqdmullh_s16(a, b)
 }
 
-/// Unsigned Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usubw))]
+#[cfg_attr(test, assert_instr(sqdmull))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
-    let c: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqdmull_s16(a, b), 0)
 }
 
-/// Unsigned Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usubw))]
+#[cfg_attr(test, assert_instr(sqdmull, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
-    let c: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
+    static_assert_uimm_bits!(N, 1);
+    let b: i32 = simd_extract!(b, N as u32);
+    vqdmulls_s32(a, b)
 }
 
-/// Signed Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ssubl))]
+#[cfg_attr(test, assert_instr(sqdmull))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
-    let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let d: int16x8_t = simd_cast(c);
-    let e: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let f: int16x8_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulls.scalar"
+        )]
+        fn _vqdmulls_s32(a: i32, b: i32) -> i64;
+    }
+    _vqdmulls_s32(a, b)
 }
 
-/// Signed Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ssubl))]
+#[cfg_attr(test, assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let d: int32x4_t = simd_cast(c);
-    let e: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let f: int32x4_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    simd_shuffle!(
+        a,
+        vqmovn_s16(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Signed Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ssubl))]
+#[cfg_attr(test, assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let d: int64x2_t = simd_cast(c);
-    let e: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let f: int64x2_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    simd_shuffle!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Unsigned Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usubl))]
+#[cfg_attr(test, assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let d: uint16x8_t = simd_cast(c);
-    let e: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let f: uint16x8_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    simd_shuffle!(a, vqmovn_s64(b), [0, 1, 2, 3])
 }
 
-/// Unsigned Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usubl))]
+#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let d: uint32x4_t = simd_cast(c);
-    let e: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let f: uint32x4_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    simd_shuffle!(
+        a,
+        vqmovn_u16(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Unsigned Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usubl))]
+#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let d: uint64x2_t = simd_cast(c);
-    let e: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let f: uint64x2_t = simd_cast(e);
-    simd_sub(d, f)
-}
-
-/// Bit clear and exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s8)
-#[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(bcax))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxs.v16i8")]
-        fn vbcaxq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
-    }
-    vbcaxq_s8_(a, b, c)
+pub unsafe fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    simd_shuffle!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Bit clear and exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s16)
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(bcax))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxs.v8i16")]
-        fn vbcaxq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
-    }
-    vbcaxq_s16_(a, b, c)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    simd_shuffle!(a, vqmovn_u64(b), [0, 1, 2, 3])
 }
 
-/// Bit clear and exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s32)
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(bcax))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxs.v4i32")]
-        fn vbcaxq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64"
+        )]
+        fn _vqmovnd_s64(a: i64) -> i32;
     }
-    vbcaxq_s32_(a, b, c)
+    _vqmovnd_s64(a)
 }
 
-/// Bit clear and exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s64)
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(bcax))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxs.v2i64")]
-        fn vbcaxq_s64_(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64"
+        )]
+        fn _vqmovnd_u64(a: i64) -> i32;
     }
-    vbcaxq_s64_(a, b, c)
+    _vqmovnd_u64(a.as_signed()).as_unsigned()
 }
 
-/// Bit clear and exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u8)
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(bcax))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxu.v16i8")]
-        fn vbcaxq_u8_(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t;
-    }
-    vbcaxq_u8_(a, b, c)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovnh_s16(a: i16) -> i8 {
+    simd_extract!(vqmovn_s16(vdupq_n_s16(a)), 0)
 }
 
-/// Bit clear and exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u16)
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(bcax))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxu.v8i16")]
-        fn vbcaxq_u16_(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t;
-    }
-    vbcaxq_u16_(a, b, c)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovns_s32(a: i32) -> i16 {
+    simd_extract!(vqmovn_s32(vdupq_n_s32(a)), 0)
 }
 
-/// Bit clear and exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u32)
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(bcax))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxu.v4i32")]
-        fn vbcaxq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
-    }
-    vbcaxq_u32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovnh_u16(a: u16) -> u8 {
+    simd_extract!(vqmovn_u16(vdupq_n_u16(a)), 0)
 }
 
-/// Bit clear and exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u64)
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(bcax))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.bcaxu.v2i64")]
-        fn vbcaxq_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
-    }
-    vbcaxq_u64_(a, b, c)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovns_u32(a: u32) -> u16 {
+    simd_extract!(vqmovn_u32(vdupq_n_u32(a)), 0)
 }
 
-/// Floating-point complex add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcadd))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32")]
-        fn vcadd_rot270_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    vcadd_rot270_f32_(a, b)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    simd_shuffle!(
+        a,
+        vqmovun_s16(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Floating-point complex add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcadd))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32")]
-        fn vcaddq_rot270_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    vcaddq_rot270_f32_(a, b)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    simd_shuffle!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Floating-point complex add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcadd))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64")]
-        fn vcaddq_rot270_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    vcaddq_rot270_f64_(a, b)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    simd_shuffle!(a, vqmovun_s64(b), [0, 1, 2, 3])
 }
 
-/// Floating-point complex add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovunh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcadd))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32")]
-        fn vcadd_rot90_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    vcadd_rot90_f32_(a, b)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovunh_s16(a: i16) -> u8 {
+    simd_extract!(vqmovun_s16(vdupq_n_s16(a)), 0)
 }
 
-/// Floating-point complex add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovuns_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcadd))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32")]
-        fn vcaddq_rot90_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    vcaddq_rot90_f32_(a, b)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovuns_s32(a: i32) -> u16 {
+    simd_extract!(vqmovun_s32(vdupq_n_s32(a)), 0)
 }
 
-/// Floating-point complex add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovund_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcadd))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64")]
-        fn vcaddq_rot90_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    vcaddq_rot90_f64_(a, b)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovund_s64(a: i64) -> u32 {
+    simd_extract!(vqmovun_s64(vdupq_n_s64(a)), 0)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f32)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot0.v2f32")]
-        fn vcmla_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v1i64"
+        )]
+        fn _vqneg_s64(a: int64x1_t) -> int64x1_t;
     }
-    vcmla_f32_(a, b, c)
+    _vqneg_s64(a)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f32)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot0.v4f32")]
-        fn vcmlaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v2i64"
+        )]
+        fn _vqnegq_s64(a: int64x2_t) -> int64x2_t;
     }
-    vcmlaq_f32_(a, b, c)
+    _vqnegq_s64(a)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_f64)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegb_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot0.v2f64")]
-        fn vcmlaq_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
-    }
-    vcmlaq_f64_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegb_s8(a: i8) -> i8 {
+    simd_extract!(vqneg_s8(vdup_n_s8(a)), 0)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f32)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_rot90_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot90.v2f32")]
-        fn vcmla_rot90_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
-    }
-    vcmla_rot90_f32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegh_s16(a: i16) -> i16 {
+    simd_extract!(vqneg_s16(vdup_n_s16(a)), 0)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f32)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegs_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot90_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot90.v4f32")]
-        fn vcmlaq_rot90_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
-    }
-    vcmlaq_rot90_f32_(a, b, c)
-}
-
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_f64)
-#[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot90.v2f64")]
-        fn vcmlaq_rot90_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
-    }
-    vcmlaq_rot90_f64_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegs_s32(a: i32) -> i32 {
+    simd_extract!(vqneg_s32(vdup_n_s32(a)), 0)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f32)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_rot180_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot180.v2f32")]
-        fn vcmla_rot180_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
-    }
-    vcmla_rot180_f32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegd_s64(a: i64) -> i64 {
+    simd_extract!(vqneg_s64(vdup_n_s64(a)), 0)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot180_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot180.v4f32")]
-        fn vcmlaq_rot180_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
-    }
-    vcmlaq_rot180_f32_(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlah_s16(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_f64)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot180.v2f64")]
-        fn vcmlaq_rot180_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
-    }
-    vcmlaq_rot180_f64_(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vqrdmlah_s32(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_rot270_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot270.v2f32")]
-        fn vcmla_rot270_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
-    }
-    vcmla_rot270_f32_(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlah_s16(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot270_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot270.v4f32")]
-        fn vcmlaq_rot270_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
-    }
-    vcmlaq_rot270_f32_(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vqrdmlah_s32(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_f64)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla))]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcmla.rot270.v2f64")]
-        fn vcmlaq_rot270_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
-    }
-    vcmlaq_rot270_f64_(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int16x8_t = simd_shuffle!(
+        c,
+        c,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmlahq_s16(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    static_assert!(LANE == 0);
-    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmla_f32(a, b, c)
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlahq_s32(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmla_f32(a, b, c)
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let c: int16x8_t = simd_shuffle!(
+        c,
+        c,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmlahq_s16(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
-    static_assert!(LANE == 0);
-    let c: float32x4_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmlaq_f32(a, b, c)
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlahq_s32(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: float32x4_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmlaq_f32(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmlah.v4i16"
+        )]
+        fn _vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
+    }
+    _vqrdmlah_s16(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_rot90_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    static_assert!(LANE == 0);
-    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmla_rot90_f32(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmlah.v8i16"
+        )]
+        fn _vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _vqrdmlahq_s16(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_rot90_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmla_rot90_f32(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmlah.v2i32"
+        )]
+        fn _vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
+    }
+    _vqrdmlah_s32(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot90_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
-    static_assert!(LANE == 0);
-    let c: float32x4_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmlaq_rot90_f32(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmlah.v4i32"
+        )]
+        fn _vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vqrdmlahq_s32(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot90_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: float32x4_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmlaq_rot90_f32(a, b, c)
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_rot180_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    static_assert!(LANE == 0);
-    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmla_rot180_f32(a, b, c)
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 3);
+    vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_rot180_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
     static_assert_uimm_bits!(LANE, 1);
-    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmla_rot180_f32(a, b, c)
+    vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot180_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
-    static_assert!(LANE == 0);
-    let c: float32x4_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmlaq_rot180_f32(a, b, c)
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot180_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: float32x4_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmlaq_rot180_f32(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    let c: int16x4_t = vdup_n_s16(c);
+    simd_extract!(vqrdmlah_s16(a, b, c), 0)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f32)
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_rot270_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    static_assert!(LANE == 0);
-    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmla_rot270_f32(a, b, c)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    let c: int32x2_t = vdup_n_s32(c);
+    simd_extract!(vqrdmlah_s32(a, b, c), 0)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmla_rot270_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmla_rot270_f32(a, b, c)
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_lane_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlsh_s16(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot270_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
-    static_assert!(LANE == 0);
-    let c: float32x4_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmlaq_rot270_f32(a, b, c)
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vqrdmlsh_s32(a, b, c)
 }
 
-/// Floating-point complex multiply accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-pub unsafe fn vcmlaq_rot270_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: float32x4_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]);
-    vcmlaq_rot270_f32(a, b, c)
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_laneq_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlsh_s16(a, b, c)
 }
 
-/// Dot product arithmetic (indexed)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub unsafe fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = transmute(c);
     let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vdot_s32(a, b, transmute(c))
+    vqrdmlsh_s32(a, b, c)
 }
 
-/// Dot product arithmetic (indexed)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub unsafe fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_lane_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = transmute(c);
+    let c: int16x8_t = simd_shuffle!(
+        c,
+        c,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmlshq_s16(a, b, c)
+}
+
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
     let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vdotq_s32(a, b, transmute(c))
+    vqrdmlshq_s32(a, b, c)
 }
 
-/// Dot product arithmetic (indexed)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub unsafe fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = transmute(c);
-    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vdot_u32(a, b, transmute(c))
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_laneq_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let c: int16x8_t = simd_shuffle!(
+        c,
+        c,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmlshq_s16(a, b, c)
 }
 
-/// Dot product arithmetic (indexed)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub unsafe fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = transmute(c);
-    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vdotq_u32(a, b, transmute(c))
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlshq_s32(a, b, c)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f64)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmax))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmax.v1f64")]
-        fn vmax_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmlsh.v4i16"
+        )]
+        fn _vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
     }
-    vmax_f64_(a, b)
+    _vqrdmlsh_s16(a, b, c)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f64)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmax))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmax.v2f64")]
-        fn vmaxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmlsh.v8i16"
+        )]
+        fn _vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
     }
-    vmaxq_f64_(a, b)
+    _vqrdmlshq_s16(a, b, c)
 }
 
-/// Floating-point Maximum Number (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f64)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnm))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
-        fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmlsh.v2i32"
+        )]
+        fn _vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
     }
-    vmaxnm_f64_(a, b)
+    _vqrdmlsh_s32(a, b, c)
 }
 
-/// Floating-point Maximum Number (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f64)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnm))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
-        fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmlsh.v4i32"
+        )]
+        fn _vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
-    vmaxnmq_f64_(a, b)
+    _vqrdmlshq_s32(a, b, c)
 }
 
-/// Floating-point maximum number across vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
-        fn vmaxnmv_f32_(a: float32x2_t) -> f32;
-    }
-    vmaxnmv_f32_(a)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point maximum number across vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f64)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
-        fn vmaxnmvq_f64_(a: float64x2_t) -> f64;
-    }
-    vmaxnmvq_f64_(a)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 3);
+    vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point maximum number across vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32")]
-        fn vmaxnmvq_f32_(a: float32x4_t) -> f32;
-    }
-    vmaxnmvq_f32_(a)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 1);
+    vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point Maximum Number Pairwise (vector).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
-        fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    vpmaxnm_f32_(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32))
 }
 
-/// Floating-point Maximum Number Pairwise (vector).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
-        fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    vpmaxnmq_f64_(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    let c: int16x4_t = vdup_n_s16(c);
+    simd_extract!(vqrdmlsh_s16(a, b, c), 0)
 }
 
-/// Floating-point Maximum Number Pairwise (vector).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
-        fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    vpmaxnmq_f32_(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    let c: int32x2_t = vdup_n_s32(c);
+    simd_extract!(vqrdmlsh_s32(a, b, c), 0)
 }
 
-/// Floating-point maximum number pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32")]
-        fn vpmaxnms_f32_(a: float32x2_t) -> f32;
-    }
-    vpmaxnms_f32_(a)
+pub unsafe fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmulhh_s16(a, simd_extract!(b, LANE as u32))
 }
 
-/// Floating-point maximum number pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64")]
-        fn vpmaxnmqd_f64_(a: float64x2_t) -> f64;
-    }
-    vpmaxnmqd_f64_(a)
+pub unsafe fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 3);
+    vqrdmulhh_s16(a, simd_extract!(b, LANE as u32))
 }
 
-/// Floating-point maximum pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxs_f32)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxp))]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32")]
-        fn vpmaxs_f32_(a: float32x2_t) -> f32;
-    }
-    vpmaxs_f32_(a)
+pub unsafe fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 1);
+    vqrdmulhs_s32(a, simd_extract!(b, LANE as u32))
 }
 
-/// Floating-point maximum pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxqd_f64)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxp))]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64")]
-        fn vpmaxqd_f64_(a: float64x2_t) -> f64;
-    }
-    vpmaxqd_f64_(a)
+pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmulhs_s32(a, simd_extract!(b, LANE as u32))
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f64)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmin))]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmin.v1f64")]
-        fn vmin_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    vmin_f64_(a, b)
+pub unsafe fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
+    simd_extract!(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f64)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmin))]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmin.v2f64")]
-        fn vminq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    vminq_f64_(a, b)
+pub unsafe fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
+    simd_extract!(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0)
 }
 
-/// Floating-point Minimum Number (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f64)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnm))]
+#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnm.v1f64")]
-        fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    vminnm_f64_(a, b)
+pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract!(vqrshl_s8(a, b), 0)
 }
 
-/// Floating-point Minimum Number (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f64)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnm))]
+#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnm.v2f64")]
-        fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    vminnmq_f64_(a, b)
+pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqrshl_s16(a, b), 0)
 }
 
-/// Floating-point minimum number across vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f32)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
-        fn vminnmv_f32_(a: float32x2_t) -> f32;
-    }
-    vminnmv_f32_(a)
+pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract!(vqrshl_u8(a, b), 0)
 }
 
-/// Floating-point minimum number across vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f64)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
-        fn vminnmvq_f64_(a: float64x2_t) -> f64;
-    }
-    vminnmvq_f64_(a)
+pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqrshl_u16(a, b), 0)
 }
 
-/// Floating-point minimum number across vector
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f32)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmv))]
+#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32")]
-        fn vminnmvq_f32_(a: float32x4_t) -> f32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.i64"
+        )]
+        fn _vqrshld_s64(a: i64, b: i64) -> i64;
     }
-    vminnmvq_f32_(a)
+    _vqrshld_s64(a, b)
 }
 
-/// Vector move
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s8)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshls_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sxtl2))]
+#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
-    let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmovl_s8(a)
+pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.i32"
+        )]
+        fn _vqrshls_s32(a: i32, b: i32) -> i32;
+    }
+    _vqrshls_s32(a, b)
 }
 
-/// Vector move
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s16)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshls_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sxtl2))]
+#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    vmovl_s16(a)
+pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.i32"
+        )]
+        fn _vqrshls_u32(a: i32, b: i32) -> i32;
+    }
+    _vqrshls_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector move
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s32)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sxtl2))]
+#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    vmovl_s32(a)
+pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.i64"
+        )]
+        fn _vqrshld_u64(a: i64, b: i64) -> i64;
+    }
+    _vqrshld_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector move
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u8)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uxtl2))]
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
-    let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmovl_u8(a)
+pub unsafe fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vqrshrn_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Vector move
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u16)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uxtl2))]
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
-    let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    vmovl_u16(a)
+pub unsafe fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector move
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u32)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uxtl2))]
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
-    let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-    vmovl_u32(a)
+pub unsafe fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Floating-point add pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(faddp))]
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddp.v4f32")]
-        fn vpaddq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    vpaddq_f32_(a, b)
+pub unsafe fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vqrshrn_n_u16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Floating-point add pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(faddp))]
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddp.v2f64")]
-        fn vpaddq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    vpaddq_f64_(a, b)
+pub unsafe fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Floating-point add pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadds_f32)
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpadds_f32(a: float32x2_t) -> f32 {
-    let a1: f32 = simd_extract!(a, 0);
-    let a2: f32 = simd_extract!(a, 1);
-    a1 + a2
+pub unsafe fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Floating-point add pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_f64)
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddd_f64(a: float64x2_t) -> f64 {
-    let a1: f64 = simd_extract!(a, 0);
-    let a2: f64 = simd_extract!(a, 1);
-    a1 + a2
+pub unsafe fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    let a: uint64x2_t = vdupq_n_u64(a);
+    simd_extract!(vqrshrn_n_u64::<N>(a), 0)
 }
 
-/// Floating-point Minimum Number Pairwise (vector).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
-        fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    vpminnm_f32_(a, b)
+pub unsafe fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
+    static_assert!(N >= 1 && N <= 8);
+    let a: uint16x8_t = vdupq_n_u16(a);
+    simd_extract!(vqrshrn_n_u16::<N>(a), 0)
 }
 
-/// Floating-point Minimum Number Pairwise (vector).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
-        fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    vpminnmq_f64_(a, b)
+pub unsafe fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
+    static_assert!(N >= 1 && N <= 16);
+    let a: uint32x4_t = vdupq_n_u32(a);
+    simd_extract!(vqrshrn_n_u32::<N>(a), 0)
 }
 
-/// Floating-point Minimum Number Pairwise (vector).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
-        fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    vpminnmq_f32_(a, b)
+pub unsafe fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
+    static_assert!(N >= 1 && N <= 8);
+    let a: int16x8_t = vdupq_n_s16(a);
+    simd_extract!(vqrshrn_n_s16::<N>(a), 0)
 }
 
-/// Floating-point minimum number pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32")]
-        fn vpminnms_f32_(a: float32x2_t) -> f32;
-    }
-    vpminnms_f32_(a)
+pub unsafe fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
+    static_assert!(N >= 1 && N <= 16);
+    let a: int32x4_t = vdupq_n_s32(a);
+    simd_extract!(vqrshrn_n_s32::<N>(a), 0)
 }
 
-/// Floating-point minimum number pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64")]
-        fn vpminnmqd_f64_(a: float64x2_t) -> f64;
-    }
-    vpminnmqd_f64_(a)
+pub unsafe fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
+    static_assert!(N >= 1 && N <= 32);
+    let a: int64x2_t = vdupq_n_s64(a);
+    simd_extract!(vqrshrn_n_s64::<N>(a), 0)
 }
 
-/// Floating-point minimum pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminp))]
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminv.f32.v2f32")]
-        fn vpmins_f32_(a: float32x2_t) -> f32;
-    }
-    vpmins_f32_(a)
+pub unsafe fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vqrshrun_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Floating-point minimum pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminp))]
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminv.f64.v2f64")]
-        fn vpminqd_f64_(a: float64x2_t) -> f64;
-    }
-    vpminqd_f64_(a)
+pub unsafe fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_s16)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull))]
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqdmull_s16(a, b), 0)
+pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_s32)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrund_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulls.scalar")]
-        fn vqdmulls_s32_(a: i32, b: i32) -> i64;
-    }
-    vqdmulls_s32_(a, b)
+pub unsafe fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    let a: int64x2_t = vdupq_n_s64(a);
+    simd_extract!(vqrshrun_n_s64::<N>(a), 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s16)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrunh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    vqdmull_s16(a, b)
+pub unsafe fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
+    static_assert!(N >= 1 && N <= 8);
+    let a: int16x8_t = vdupq_n_s16(a);
+    simd_extract!(vqrshrun_n_s16::<N>(a), 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s32)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshruns_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    vqdmull_s32(a, b)
+pub unsafe fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
+    static_assert!(N >= 1 && N <= 16);
+    let a: int32x4_t = vdupq_n_s32(a);
+    simd_extract!(vqrshrun_n_s32::<N>(a), 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = vdup_n_s16(b);
-    vqdmull_s16(a, b)
+pub unsafe fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(vqshl_n_s8::<N>(vdup_n_s8(a)), 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = vdup_n_s32(b);
-    vqdmull_s32(a, b)
+pub unsafe fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert_uimm_bits!(N, 6);
+    simd_extract!(vqshl_n_s64::<N>(vdup_n_s64(a)), 0)
 }
 
-/// Vector saturating doubling long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+pub unsafe fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(vqshl_n_s16::<N>(vdup_n_s16(a)), 0)
 }
 
-/// Vector saturating doubling long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+pub unsafe fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
+    static_assert_uimm_bits!(N, 5);
+    simd_extract!(vqshl_n_s32::<N>(vdup_n_s32(a)), 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_lane_s16)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
-    static_assert_uimm_bits!(N, 2);
-    let b: i16 = simd_extract!(b, N as u32);
-    vqdmullh_s16(a, b)
+pub unsafe fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(vqshl_n_u8::<N>(vdup_n_u8(a)), 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_laneq_s16)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
-    static_assert_uimm_bits!(N, 3);
-    let b: i16 = simd_extract!(b, N as u32);
-    vqdmullh_s16(a, b)
+pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert_uimm_bits!(N, 6);
+    simd_extract!(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_lane_s32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
-    static_assert_uimm_bits!(N, 1);
-    let b: i32 = simd_extract!(b, N as u32);
-    vqdmulls_s32(a, b)
+pub unsafe fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(vqshl_n_u16::<N>(vdup_n_u16(a)), 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_laneq_s32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
-    static_assert_uimm_bits!(N, 2);
-    let b: i32 = simd_extract!(b, N as u32);
-    vqdmulls_s32(a, b)
+pub unsafe fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
+    static_assert_uimm_bits!(N, 5);
+    simd_extract!(vqshl_n_u32::<N>(vdup_n_u32(a)), 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 {
+    let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
+    simd_extract!(c, 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 {
+    let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
+    simd_extract!(c, 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 4))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
+    let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
+    simd_extract!(c, 0)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 {
+    let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
+    simd_extract!(c, 0)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s16)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_high_s16(b, c))
+pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 {
+    let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
+    simd_extract!(c, 0)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_high_s32(b, c))
+pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
+    let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
+    simd_extract!(c, 0)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_high_n_s16(b, c))
+pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.i64"
+        )]
+        fn _vqshld_s64(a: i64, b: i64) -> i64;
+    }
+    _vqshld_s64(a, b)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_high_n_s32(b, c))
+pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.i64"
+        )]
+        fn _vqshld_u64(a: i64, b: i64) -> i64;
+    }
+    _vqshld_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s16)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlub_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 2))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+pub unsafe fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
     static_assert_uimm_bits!(N, 3);
-    vqaddq_s32(a, vqdmull_laneq_s16::<N>(b, c))
+    simd_extract!(vqshlu_n_s8::<N>(vdup_n_s8(a)), 0)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s32)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlud_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s64(a, vqdmull_laneq_s32::<N>(b, c))
+pub unsafe fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
+    static_assert_uimm_bits!(N, 6);
+    simd_extract!(vqshlu_n_s64::<N>(vdup_n_s64(a)), 0)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s16)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
+pub unsafe fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(vqshlu_n_s16::<N>(vdup_n_s16(a)), 0)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s16)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlus_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    vqaddq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
+pub unsafe fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
+    static_assert_uimm_bits!(N, 5);
+    simd_extract!(vqshlu_n_s32::<N>(vdup_n_s32(a)), 0)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s32)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqaddq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
+pub unsafe fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vqshrn_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s32)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
+pub unsafe fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_s16)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal))]
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
-    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
-    vqadds_s32(a, simd_extract!(x, 0))
+pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_s32)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal))]
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
-    let x: i64 = vqaddd_s64(a, vqdmulls_s32(b, c));
-    x as i64
+pub unsafe fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vqshrn_n_u16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_lane_s16)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_laneq_s16)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_lane_s32)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqdmlals_s32(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.i32"
+        )]
+        fn _vqshrnd_n_s64(a: i64, n: i32) -> i32;
+    }
+    _vqshrnd_n_s64(a, N)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_laneq_s32)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmlals_s32(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.i32"
+        )]
+        fn _vqshrnd_n_u64(a: i64, n: i32) -> i32;
+    }
+    _vqshrnd_n_u64(a.as_signed(), N).as_unsigned()
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s16)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_high_s16(b, c))
+pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
+    static_assert!(N >= 1 && N <= 8);
+    simd_extract!(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s32)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_high_s32(b, c))
+pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
+    static_assert!(N >= 1 && N <= 16);
+    simd_extract!(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s16)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_high_n_s16(b, c))
+pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
+    static_assert!(N >= 1 && N <= 8);
+    simd_extract!(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s32)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_high_n_s32(b, c))
+pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
+    static_assert!(N >= 1 && N <= 16);
+    simd_extract!(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s16)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    vqsubq_s32(a, vqdmull_laneq_s16::<N>(b, c))
+pub unsafe fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vqshrun_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s32)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s64(a, vqdmull_laneq_s32::<N>(b, c))
+pub unsafe fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s16)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
+pub unsafe fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s16)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrund_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    vqsubq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
+pub unsafe fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    simd_extract!(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s32)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrunh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqsubq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
+pub unsafe fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
+    static_assert!(N >= 1 && N <= 8);
+    simd_extract!(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s32)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshruns_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
+pub unsafe fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
+    static_assert!(N >= 1 && N <= 16);
+    simd_extract!(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_s16)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
-    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
-    vqsubs_s32(a, simd_extract!(x, 0))
+#[cfg_attr(test, assert_instr(sqsub))]
+pub unsafe fn vqsubb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract!(vqsub_s8(a, b), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_s32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
-    let x: i64 = vqsubd_s64(a, vqdmulls_s32(b, c));
-    x as i64
+#[cfg_attr(test, assert_instr(sqsub))]
+pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqsub_s16(a, b), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_lane_s16)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32))
+#[cfg_attr(test, assert_instr(uqsub))]
+pub unsafe fn vqsubb_u8(a: u8, b: u8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: uint8x8_t = vdup_n_u8(b);
+    simd_extract!(vqsub_u8(a, b), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_laneq_s16)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32))
+#[cfg_attr(test, assert_instr(uqsub))]
+pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: uint16x4_t = vdup_n_u16(b);
+    simd_extract!(vqsub_u16(a, b), 0)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_lane_s32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32))
+#[cfg_attr(test, assert_instr(sqsub))]
+pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.i32"
+        )]
+        fn _vqsubs_s32(a: i32, b: i32) -> i32;
+    }
+    _vqsubs_s32(a, b)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_laneq_s32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32))
+#[cfg_attr(test, assert_instr(sqsub))]
+pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.i64"
+        )]
+        fn _vqsubd_s64(a: i64, b: i64) -> i64;
+    }
+    _vqsubd_s64(a, b)
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_s16)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqdmulh_s16(a, b), 0)
+#[cfg_attr(test, assert_instr(uqsub))]
+pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.i32"
+        )]
+        fn _vqsubs_u32(a: i32, b: i32) -> i32;
+    }
+    _vqsubs_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_s32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
-    let a: int32x2_t = vdup_n_s32(a);
-    let b: int32x2_t = vdup_n_s32(b);
-    simd_extract!(vqdmulh_s32(a, b), 0)
+#[cfg_attr(test, assert_instr(uqsub))]
+pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.i64"
+        )]
+        fn _vqsubd_u64(a: i64, b: i64) -> i64;
+    }
+    _vqsubd_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_lane_s16)
+#[doc = "Rotate and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrax1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(N, 2);
-    let b: i16 = simd_extract!(b, N as u32);
-    vqdmulhh_s16(a, b)
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(rax1))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.rax1"
+        )]
+        fn _vrax1q_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vrax1q_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_laneq_s16)
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(N, 3);
-    let b: i16 = simd_extract!(b, N as u32);
-    vqdmulhh_s16(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rbit.v8i8"
+        )]
+        fn _vrbit_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vrbit_s8(a)
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_lane_s32)
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(N, 1);
-    let b: i32 = simd_extract!(b, N as u32);
-    vqdmulhs_s32(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rbit.v16i8"
+        )]
+        fn _vrbitq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vrbitq_s8(a)
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_laneq_s32)
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(N, 2);
-    let b: i32 = simd_extract!(b, N as u32);
-    vqdmulhs_s32(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
+    transmute(vrbit_s8(transmute(a)))
 }
 
-/// Vector saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s16)
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32)))
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
+    transmute(vrbitq_s8(transmute(a)))
 }
 
-/// Vector saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s16)
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32)))
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
+    transmute(vrbit_s8(transmute(a)))
 }
 
-/// Vector saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s32)
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32)))
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
+    transmute(vrbitq_s8(transmute(a)))
 }
 
-/// Vector saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s32)
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32)))
+pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v1f64"
+        )]
+        fn _vrecpe_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrecpe_f64(a)
 }
 
-/// Saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_s16)
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn))]
+#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovnh_s16(a: i16) -> i8 {
-    simd_extract!(vqmovn_s16(vdupq_n_s16(a)), 0)
+pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v2f64"
+        )]
+        fn _vrecpeq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrecpeq_f64(a)
 }
 
-/// Saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_s32)
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecped_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn))]
+#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovns_s32(a: i32) -> i16 {
-    simd_extract!(vqmovn_s32(vdupq_n_s32(a)), 0)
+pub unsafe fn vrecped_f64(a: f64) -> f64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.f64"
+        )]
+        fn _vrecped_f64(a: f64) -> f64;
+    }
+    _vrecped_f64(a)
 }
 
-/// Saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_u16)
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpes_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn))]
+#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovnh_u16(a: u16) -> u8 {
-    simd_extract!(vqmovn_u16(vdupq_n_u16(a)), 0)
+pub unsafe fn vrecpes_f32(a: f32) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.f32"
+        )]
+        fn _vrecpes_f32(a: f32) -> f32;
+    }
+    _vrecpes_f32(a)
 }
 
-/// Saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_u32)
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn))]
+#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovns_u32(a: u32) -> u16 {
-    simd_extract!(vqmovn_u32(vdupq_n_u32(a)), 0)
+pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v1f64"
+        )]
+        fn _vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vrecps_f64(a, b)
 }
 
-/// Saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_s64)
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn))]
+#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")]
-        fn vqmovnd_s64_(a: i64) -> i32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v2f64"
+        )]
+        fn _vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    vqmovnd_s64_(a)
+    _vrecpsq_f64(a, b)
 }
 
-/// Saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_u64)
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn))]
+#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")]
-        fn vqmovnd_u64_(a: u64) -> u32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.f64"
+        )]
+        fn _vrecpsd_f64(a: f64, b: f64) -> f64;
     }
-    vqmovnd_u64_(a)
+    _vrecpsd_f64(a, b)
 }
 
-/// Signed saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s16)
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpss_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
+#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    simd_shuffle!(a, vqmovn_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.f32"
+        )]
+        fn _vrecpss_f32(a: f32, b: f32) -> f32;
+    }
+    _vrecpss_f32(a, b)
 }
 
-/// Signed saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s32)
+#[doc = "Floating-point reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
+#[cfg_attr(test, assert_instr(frecpx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    simd_shuffle!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vrecpxd_f64(a: f64) -> f64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpx.f64"
+        )]
+        fn _vrecpxd_f64(a: f64) -> f64;
+    }
+    _vrecpxd_f64(a)
 }
 
-/// Signed saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s64)
+#[doc = "Floating-point reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
+#[cfg_attr(test, assert_instr(frecpx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    simd_shuffle!(a, vqmovn_s64(b), [0, 1, 2, 3])
+pub unsafe fn vrecpxs_f32(a: f32) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpx.f32"
+        )]
+        fn _vrecpxs_f32(a: f32) -> f32;
+    }
+    _vrecpxs_f32(a)
 }
 
-/// Signed saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    simd_shuffle!(a, vqmovn_u16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    simd_shuffle!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    simd_shuffle!(a, vqmovn_u64(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating extract unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovunh_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovunh_s16(a: i16) -> u8 {
-    simd_extract!(vqmovun_s16(vdupq_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating extract unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovuns_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovuns_s32(a: i32) -> u16 {
-    simd_extract!(vqmovun_s32(vdupq_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating extract unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovund_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovund_s64(a: i64) -> u32 {
-    simd_extract!(vqmovun_s64(vdupq_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Signed saturating extract unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    simd_shuffle!(a, vqmovun_s16(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Signed saturating extract unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    simd_shuffle!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Signed saturating extract unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    simd_shuffle!(a, vqmovun_s64(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
-    simd_extract!(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
-    simd_extract!(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmulhh_s16(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqrdmulhh_s16(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqrdmulhs_s32(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmulhs_s32(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlah.v4i16")]
-        fn vqrdmlah_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
-    }
-    vqrdmlah_s16_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlah.v8i16")]
-        fn vqrdmlahq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
-    }
-    vqrdmlahq_s16_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlah.v2i32")]
-        fn vqrdmlah_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
-    }
-    vqrdmlah_s32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlah.v4i32")]
-        fn vqrdmlahq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
-    }
-    vqrdmlahq_s32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    let c: int16x4_t = vdup_n_s16(c);
-    simd_extract!(vqrdmlah_s16(a, b, c), 0)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
-    let a: int32x2_t = vdup_n_s32(a);
-    let b: int32x2_t = vdup_n_s32(b);
-    let c: int32x2_t = vdup_n_s32(c);
-    simd_extract!(vqrdmlah_s32(a, b, c), 0)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlah_s16(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlah_s16(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlahq_s16(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlahq_s16(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vqrdmlah_s32(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vqrdmlah_s32(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlahq_s32(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlahq_s32(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply accumulate returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlsh.v4i16")]
-        fn vqrdmlsh_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
-    }
-    vqrdmlsh_s16_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlsh.v8i16")]
-        fn vqrdmlshq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
-    }
-    vqrdmlshq_s16_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlsh.v2i32")]
-        fn vqrdmlsh_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
-    }
-    vqrdmlsh_s32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmlsh.v4i32")]
-        fn vqrdmlshq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
-    }
-    vqrdmlshq_s32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    let c: int16x4_t = vdup_n_s16(c);
-    simd_extract!(vqrdmlsh_s16(a, b, c), 0)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
-    let a: int32x2_t = vdup_n_s32(a);
-    let b: int32x2_t = vdup_n_s32(b);
-    let c: int32x2_t = vdup_n_s32(c);
-    simd_extract!(vqrdmlsh_s32(a, b, c), 0)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlsh_s16(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlsh_s16(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlshq_s16(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlshq_s16(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vqrdmlsh_s32(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vqrdmlsh_s32(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlshq_s32(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlshq_s32(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_lane_s32)
-#[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32))
-}
-
-/// Signed saturating rounding doubling multiply subtract returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_laneq_s32)
-#[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32))
-}
-
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshls_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.i32")]
-        fn vqrshls_s32_(a: i32, b: i32) -> i32;
-    }
-    vqrshls_s32_(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.i64")]
-        fn vqrshld_s64_(a: i64, b: i64) -> i64;
-    }
-    vqrshld_s64_(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 {
-    let a: int8x8_t = vdup_n_s8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract!(vqrshl_s8(a, b), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqrshl_s16(a, b), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshls_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.i32")]
-        fn vqrshls_u32_(a: u32, b: i32) -> u32;
-    }
-    vqrshls_u32_(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.i64")]
-        fn vqrshld_u64_(a: u64, b: i64) -> u64;
-    }
-    vqrshld_u64_(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 {
-    let a: uint8x8_t = vdup_n_u8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract!(vqrshl_u8(a, b), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
-    let a: uint16x4_t = vdup_n_u16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqrshl_u16(a, b), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
-    static_assert!(N >= 1 && N <= 8);
-    let a: int16x8_t = vdupq_n_s16(a);
-    simd_extract!(vqrshrn_n_s16::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
-    static_assert!(N >= 1 && N <= 16);
-    let a: int32x4_t = vdupq_n_s32(a);
-    simd_extract!(vqrshrn_n_s32::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
-    static_assert!(N >= 1 && N <= 32);
-    let a: int64x2_t = vdupq_n_s64(a);
-    simd_extract!(vqrshrn_n_s64::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vqrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s64)
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32x.v2f32"
+        )]
+        fn _vrnd32x_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnd32x_f32(a)
 }
 
-/// Unsigned saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_u16)
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
-    static_assert!(N >= 1 && N <= 8);
-    let a: uint16x8_t = vdupq_n_u16(a);
-    simd_extract!(vqrshrn_n_u16::<N>(a), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32x.v4f32"
+        )]
+        fn _vrnd32xq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrnd32xq_f32(a)
 }
 
-/// Unsigned saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_u32)
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
-    static_assert!(N >= 1 && N <= 16);
-    let a: uint32x4_t = vdupq_n_u32(a);
-    simd_extract!(vqrshrn_n_u32::<N>(a), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32x.v2f64"
+        )]
+        fn _vrnd32xq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrnd32xq_f64(a)
 }
 
-/// Unsigned saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_u64)
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
-    let a: uint64x2_t = vdupq_n_u64(a);
-    simd_extract!(vqrshrn_n_u64::<N>(a), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint32x.f64"
+        )]
+        fn _vrnd32x_f64(a: f64) -> f64;
+    }
+    transmute(_vrnd32x_f64(simd_extract!(a, 0)))
 }
 
-/// Unsigned saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u16)
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vqrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32z.v2f32"
+        )]
+        fn _vrnd32z_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnd32z_f32(a)
 }
 
-/// Unsigned saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u32)
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32z.v4f32"
+        )]
+        fn _vrnd32zq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrnd32zq_f32(a)
 }
 
-/// Unsigned saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u64)
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32z.v2f64"
+        )]
+        fn _vrnd32zq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrnd32zq_f64(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrunh_n_s16)
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
-    static_assert!(N >= 1 && N <= 8);
-    let a: int16x8_t = vdupq_n_s16(a);
-    simd_extract!(vqrshrun_n_s16::<N>(a), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint32z.f64"
+        )]
+        fn _vrnd32z_f64(a: f64) -> f64;
+    }
+    transmute(_vrnd32z_f64(simd_extract!(a, 0)))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshruns_n_s32)
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
-    static_assert!(N >= 1 && N <= 16);
-    let a: int32x4_t = vdupq_n_s32(a);
-    simd_extract!(vqrshrun_n_s32::<N>(a), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64x.v2f32"
+        )]
+        fn _vrnd64x_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnd64x_f32(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrund_n_s64)
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
-    let a: int64x2_t = vdupq_n_s64(a);
-    simd_extract!(vqrshrun_n_s64::<N>(a), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64x.v4f32"
+        )]
+        fn _vrnd64xq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrnd64xq_f32(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s16)
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vqrshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64x.v2f64"
+        )]
+        fn _vrnd64xq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrnd64xq_f64(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s32)
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint64x.f64"
+        )]
+        fn _vrnd64x_f64(a: f64) -> f64;
+    }
+    transmute(_vrnd64x_f64(simd_extract!(a, 0)))
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s64)
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64z.v2f32"
+        )]
+        fn _vrnd64z_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnd64z_f32(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_s64)
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.i64")]
-        fn vqshld_s64_(a: i64, b: i64) -> i64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64z.v4f32"
+        )]
+        fn _vrnd64zq_f32(a: float32x4_t) -> float32x4_t;
     }
-    vqshld_s64_(a, b)
+    _vrnd64zq_f32(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_s8)
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 {
-    let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
-    simd_extract!(c, 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64z.v2f64"
+        )]
+        fn _vrnd64zq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrnd64zq_f64(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_s16)
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 {
-    let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
-    simd_extract!(c, 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint64z.f64"
+        )]
+        fn _vrnd64z_f64(a: f64) -> f64;
+    }
+    transmute(_vrnd64z_f64(simd_extract!(a, 0)))
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_s32)
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
-    let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(frintz))]
+pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.trunc.v2f32"
+        )]
+        fn _vrnd_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnd_f32(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_u64)
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(frintz))]
+pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.i64")]
-        fn vqshld_u64_(a: u64, b: i64) -> u64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.trunc.v4f32"
+        )]
+        fn _vrndq_f32(a: float32x4_t) -> float32x4_t;
     }
-    vqshld_u64_(a, b)
+    _vrndq_f32(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_u8)
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 {
-    let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(frintz))]
+pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.trunc.v1f64"
+        )]
+        fn _vrnd_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrnd_f64(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_u16)
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 {
-    let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(frintz))]
+pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.trunc.v2f64"
+        )]
+        fn _vrndq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrndq_f64(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_u32)
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
-    let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(frinta))]
+pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.round.v2f32"
+        )]
+        fn _vrnda_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnda_f32(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_s8)
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(vqshl_n_s8::<N>(vdup_n_s8(a)), 0)
+#[cfg_attr(test, assert_instr(frinta))]
+pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.round.v4f32"
+        )]
+        fn _vrndaq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrndaq_f32(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_s16)
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(vqshl_n_s16::<N>(vdup_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(frinta))]
+pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.round.v1f64"
+        )]
+        fn _vrnda_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrnda_f64(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_s32)
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
-    static_assert_uimm_bits!(N, 5);
-    simd_extract!(vqshl_n_s32::<N>(vdup_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(frinta))]
+pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.round.v2f64"
+        )]
+        fn _vrndaq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrndaq_f64(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_s64)
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
-    static_assert_uimm_bits!(N, 6);
-    simd_extract!(vqshl_n_s64::<N>(vdup_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(frinti))]
+pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v2f32"
+        )]
+        fn _vrndi_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrndi_f32(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_u8)
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(vqshl_n_u8::<N>(vdup_n_u8(a)), 0)
+#[cfg_attr(test, assert_instr(frinti))]
+pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v4f32"
+        )]
+        fn _vrndiq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrndiq_f32(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_u16)
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(vqshl_n_u16::<N>(vdup_n_u16(a)), 0)
+#[cfg_attr(test, assert_instr(frinti))]
+pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v1f64"
+        )]
+        fn _vrndi_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrndi_f64(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_u32)
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
-    static_assert_uimm_bits!(N, 5);
-    simd_extract!(vqshl_n_u32::<N>(vdup_n_u32(a)), 0)
+#[cfg_attr(test, assert_instr(frinti))]
+pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.nearbyint.v2f64"
+        )]
+        fn _vrndiq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrndiq_f64(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_u64)
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
-    static_assert_uimm_bits!(N, 6);
-    simd_extract!(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
+#[cfg_attr(test, assert_instr(frintm))]
+pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.floor.v2f32"
+        )]
+        fn _vrndm_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrndm_f32(a)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlub_n_s8)
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(vqshlu_n_s8::<N>(vdup_n_s8(a)), 0)
+#[cfg_attr(test, assert_instr(frintm))]
+pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.floor.v4f32"
+        )]
+        fn _vrndmq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrndmq_f32(a)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluh_n_s16)
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(vqshlu_n_s16::<N>(vdup_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(frintm))]
+pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.floor.v1f64"
+        )]
+        fn _vrndm_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrndm_f64(a)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlus_n_s32)
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
-    static_assert_uimm_bits!(N, 5);
-    simd_extract!(vqshlu_n_s32::<N>(vdup_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(frintm))]
+pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.floor.v2f64"
+        )]
+        fn _vrndmq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrndmq_f64(a)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlud_n_s64)
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
-    static_assert_uimm_bits!(N, 6);
-    simd_extract!(vqshlu_n_s64::<N>(vdup_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frintn.v1f64"
+        )]
+        fn _vrndn_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrndn_f64(a)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_s64)
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrn.i32")]
-        fn vqshrnd_n_s64_(a: i64, n: i32) -> i32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frintn.v2f64"
+        )]
+        fn _vrndnq_f64(a: float64x2_t) -> float64x2_t;
     }
-    vqshrnd_n_s64_(a, N)
+    _vrndnq_f64(a)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_s16)
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
-    static_assert!(N >= 1 && N <= 8);
-    simd_extract!(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndns_f32(a: f32) -> f32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.roundeven.f32"
+        )]
+        fn _vrndns_f32(a: f32) -> f32;
+    }
+    _vrndns_f32(a)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_s32)
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
-    static_assert!(N >= 1 && N <= 16);
-    simd_extract!(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(frintp))]
+pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ceil.v2f32"
+        )]
+        fn _vrndp_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrndp_f32(a)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s16)
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vqshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(frintp))]
+pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ceil.v4f32"
+        )]
+        fn _vrndpq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrndpq_f32(a)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s32)
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(frintp))]
+pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ceil.v1f64"
+        )]
+        fn _vrndp_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrndp_f64(a)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s64)
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
-}
-
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_u64)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(frintp))]
+pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshrn.i32")]
-        fn vqshrnd_n_u64_(a: u64, n: i32) -> u32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ceil.v2f64"
+        )]
+        fn _vrndpq_f64(a: float64x2_t) -> float64x2_t;
     }
-    vqshrnd_n_u64_(a, N)
+    _vrndpq_f64(a)
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_u16)
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
-    static_assert!(N >= 1 && N <= 8);
-    simd_extract!(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0)
+#[cfg_attr(test, assert_instr(frintx))]
+pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.rint.v2f32"
+        )]
+        fn _vrndx_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrndx_f32(a)
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_u32)
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
-    static_assert!(N >= 1 && N <= 16);
-    simd_extract!(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
+#[cfg_attr(test, assert_instr(frintx))]
+pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.rint.v4f32"
+        )]
+        fn _vrndxq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrndxq_f32(a)
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u16)
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vqshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(frintx))]
+pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.rint.v1f64"
+        )]
+        fn _vrndx_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrndx_f64(a)
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u32)
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(frintx))]
+pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.rint.v2f64"
+        )]
+        fn _vrndxq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrndxq_f64(a)
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u64)
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(srshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3])
+pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.i64"
+        )]
+        fn _vrshld_s64(a: i64, b: i64) -> i64;
+    }
+    _vrshld_s64(a, b)
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrunh_n_s16)
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(urshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
-    static_assert!(N >= 1 && N <= 8);
-    simd_extract!(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0)
+pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.i64"
+        )]
+        fn _vrshld_u64(a: i64, b: i64) -> i64;
+    }
+    _vrshld_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshruns_n_s32)
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[cfg_attr(test, assert_instr(srshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
-    static_assert!(N >= 1 && N <= 16);
-    simd_extract!(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0)
+pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    vrshld_s64(a, -N as i64)
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrund_n_s64)
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[cfg_attr(test, assert_instr(urshr, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
-    simd_extract!(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0)
+pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    vrshld_u64(a, -N as i64)
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s16)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vqshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+    simd_shuffle!(
+        a,
+        vrshrn_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s32)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+    simd_shuffle!(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s64)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3])
-}
-
-/// Unsigned saturating accumulate of signed value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddb_u8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddb_u8(a: u8, b: i8) -> u8 {
-    simd_extract!(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0)
-}
-
-/// Unsigned saturating accumulate of signed value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddh_u16(a: u16, b: i16) -> u16 {
-    simd_extract!(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0)
-}
-
-/// Unsigned saturating accumulate of signed value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadds_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.usqadd.i32")]
-        fn vsqadds_u32_(a: u32, b: i32) -> u32;
-    }
-    vsqadds_u32_(a, b)
-}
-
-/// Unsigned saturating accumulate of signed value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddd_u64(a: u64, b: i64) -> u64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.usqadd.i64")]
-        fn vsqaddd_u64_(a: u64, b: i64) -> u64;
-    }
-    vsqaddd_u64_(a, b)
-}
-
-/// Calculates the square root of each lane.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqrt_f32(a: float32x2_t) -> float32x2_t {
-    simd_fsqrt(a)
+    simd_shuffle!(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Calculates the square root of each lane.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f32)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqrtq_f32(a: float32x4_t) -> float32x4_t {
-    simd_fsqrt(a)
+pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vrshrn_n_u16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Calculates the square root of each lane.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f64)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqrt_f64(a: float64x1_t) -> float64x1_t {
-    simd_fsqrt(a)
+pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Calculates the square root of each lane.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f64)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
-    simd_fsqrt(a)
+pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Reciprocal square-root estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.v1f64")]
-        fn vrsqrte_f64_(a: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v1f64"
+        )]
+        fn _vrsqrte_f64(a: float64x1_t) -> float64x1_t;
     }
-    vrsqrte_f64_(a)
+    _vrsqrte_f64(a)
 }
 
-/// Reciprocal square-root estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.v2f64")]
-        fn vrsqrteq_f64_(a: float64x2_t) -> float64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v2f64"
+        )]
+        fn _vrsqrteq_f64(a: float64x2_t) -> float64x2_t;
     }
-    vrsqrteq_f64_(a)
+    _vrsqrteq_f64(a)
 }
 
-/// Reciprocal square-root estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.f32")]
-        fn vrsqrtes_f32_(a: f32) -> f32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.f64"
+        )]
+        fn _vrsqrted_f64(a: f64) -> f64;
     }
-    vrsqrtes_f32_(a)
+    _vrsqrted_f64(a)
 }
 
-/// Reciprocal square-root estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.f64")]
-        fn vrsqrted_f64_(a: f64) -> f64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.f32"
+        )]
+        fn _vrsqrtes_f32(a: f32) -> f32;
     }
-    vrsqrted_f64_(a)
+    _vrsqrtes_f32(a)
 }
 
-/// Floating-point reciprocal square root step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.v1f64")]
-        fn vrsqrts_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v1f64"
+        )]
+        fn _vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
-    vrsqrts_f64_(a, b)
+    _vrsqrts_f64(a, b)
 }
 
-/// Floating-point reciprocal square root step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.v2f64")]
-        fn vrsqrtsq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v2f64"
+        )]
+        fn _vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    vrsqrtsq_f64_(a, b)
+    _vrsqrtsq_f64(a, b)
 }
 
-/// Floating-point reciprocal square root step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.f32")]
-        fn vrsqrtss_f32_(a: f32, b: f32) -> f32;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.f64"
+        )]
+        fn _vrsqrtsd_f64(a: f64, b: f64) -> f64;
     }
-    vrsqrtss_f32_(a, b)
+    _vrsqrtsd_f64(a, b)
 }
 
-/// Floating-point reciprocal square root step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
-    #[allow(improper_ctypes)]
+pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.f64")]
-        fn vrsqrtsd_f64_(a: f64, b: f64) -> f64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.f32"
+        )]
+        fn _vrsqrtss_f32(a: f32, b: f32) -> f32;
     }
-    vrsqrtsd_f64_(a, b)
+    _vrsqrtss_f32(a, b)
 }
 
-/// Reciprocal estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f64)
+#[doc = "Signed rounding shift right and accumulate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
+#[cfg_attr(test, assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.v1f64")]
-        fn vrecpe_f64_(a: float64x1_t) -> float64x1_t;
-    }
-    vrecpe_f64_(a)
+pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    let b: i64 = vrshrd_n_s64::<N>(b);
+    a.wrapping_add(b)
 }
 
-/// Reciprocal estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f64)
+#[doc = "Unsigned rounding shift right and accumulate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
+#[cfg_attr(test, assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.v2f64")]
-        fn vrecpeq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrecpeq_f64_(a)
+pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    let b: u64 = vrshrd_n_u64::<N>(b);
+    a.wrapping_add(b)
 }
 
-/// Reciprocal estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpes_f32)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpes_f32(a: f32) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.f32")]
-        fn vrecpes_f32_(a: f32) -> f32;
-    }
-    vrecpes_f32_(a)
+pub unsafe fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let x: int8x8_t = vrsubhn_s16(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Reciprocal estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecped_f64)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecped_f64(a: f64) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.f64")]
-        fn vrecped_f64_(a: f64) -> f64;
-    }
-    vrecped_f64_(a)
+pub unsafe fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let x: int16x4_t = vrsubhn_s32(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Floating-point reciprocal step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f64)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecps))]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.v1f64")]
-        fn vrecps_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    vrecps_f64_(a, b)
+pub unsafe fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let x: int32x2_t = vrsubhn_s64(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3])
 }
 
-/// Floating-point reciprocal step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f64)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecps))]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.v2f64")]
-        fn vrecpsq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    vrecpsq_f64_(a, b)
+pub unsafe fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    let x: uint8x8_t = vrsubhn_u16(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Floating-point reciprocal step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpss_f32)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecps))]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.f32")]
-        fn vrecpss_f32_(a: f32, b: f32) -> f32;
-    }
-    vrecpss_f32_(a, b)
+pub unsafe fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    let x: uint16x4_t = vrsubhn_u32(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Floating-point reciprocal step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsd_f64)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecps))]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.f64")]
-        fn vrecpsd_f64_(a: f64, b: f64) -> f64;
-    }
-    vrecpsd_f64_(a, b)
+pub unsafe fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    let x: uint32x2_t = vrsubhn_u64(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3])
 }
 
-/// Floating-point reciprocal exponent
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxs_f32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpx))]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpxs_f32(a: f32) -> f32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpx.f32")]
-        fn vrecpxs_f32_(a: f32) -> f32;
-    }
-    vrecpxs_f32_(a)
+pub unsafe fn vset_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE == 0);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Floating-point reciprocal exponent
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxd_f64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpx))]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpxd_f64(a: f64) -> f64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpx.f64")]
-        fn vrecpxd_f64_(a: f64) -> f64;
-    }
-    vrecpxd_f64_(a)
-}
+pub unsafe fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(b, LANE as u32, a)
+}
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)
+#[doc = "SHA512 hash update part 2"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h2))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512h2"
+        )]
+        fn _vsha512h2q_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vsha512h2q_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)
+#[doc = "SHA512 hash update part 1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512h"
+        )]
+        fn _vsha512hq_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vsha512hq_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)
+#[doc = "SHA512 schedule update 0"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su0))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512su0"
+        )]
+        fn _vsha512su0q_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vsha512su0q_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)
+#[doc = "SHA512 schedule update 1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su1))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512su1"
+        )]
+        fn _vsha512su1q_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vsha512su1q_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
+    transmute(vshl_s64(transmute(a), transmute(b)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(ushl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
-    transmute(a)
+pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
+    transmute(vshl_u64(transmute(a), transmute(b)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    let b: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vshll_n_s8::<N>(b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    let b: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    vshll_n_s16::<N>(b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    let b: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    vshll_n_s32::<N>(b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    let b: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vshll_n_u8::<N>(b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    let b: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    vshll_n_u16::<N>(b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    let b: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
+    vshll_n_u32::<N>(b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vshrn_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vshrn_n_u16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
-    transmute(a)
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
+pub unsafe fn vslid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vsli_n_s64::<N>(transmute(a), transmute(b)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
-    transmute(a)
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
+pub unsafe fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vsli_n_u64::<N>(transmute(a), transmute(b)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)
+#[doc = "SM3PARTW1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw1))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm3partw1"
+        )]
+        fn _vsm3partw1q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vsm3partw1q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)
+#[doc = "SM3PARTW2"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
-    transmute(a)
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw2))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm3partw2"
+        )]
+        fn _vsm3partw2q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vsm3partw2q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)
+#[doc = "SM3SS1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
-    transmute(a)
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3ss1))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm3ss1"
+        )]
+        fn _vsm3ss1q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vsm3ss1q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)
+#[doc = "SM4 key"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4ekey))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm4ekey"
+        )]
+        fn _vsm4ekeyq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vsm4ekeyq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+}
+
+#[doc = "SM4 encode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4e))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sm4e"
+        )]
+        fn _vsm4eq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vsm4eq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)
+#[doc = "Unsigned saturating accumulate of signed value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddb_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vsqaddb_u8(a: u8, b: i8) -> u8 {
+    simd_extract!(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)
+#[doc = "Unsigned saturating accumulate of signed value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
-    transmute(a)
+pub unsafe fn vsqaddh_u16(a: u16, b: i16) -> u16 {
+    simd_extract!(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)
+#[doc = "Unsigned saturating accumulate of signed value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
-    transmute(a)
+pub unsafe fn vsqaddd_u64(a: u64, b: i64) -> u64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usqadd.i64"
+        )]
+        fn _vsqaddd_u64(a: i64, b: i64) -> i64;
+    }
+    _vsqaddd_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)
+#[doc = "Unsigned saturating accumulate of signed value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadds_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
-    transmute(a)
+pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usqadd.i32"
+        )]
+        fn _vsqadds_u32(a: i32, b: i32) -> i32;
+    }
+    _vsqadds_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)
+#[doc = "Calculates the square root of each lane."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(fsqrt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
-    transmute(a)
+pub unsafe fn vsqrt_f32(a: float32x2_t) -> float32x2_t {
+    simd_fsqrt(a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)
+#[doc = "Calculates the square root of each lane."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(fsqrt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
-    transmute(a)
+pub unsafe fn vsqrtq_f32(a: float32x4_t) -> float32x4_t {
+    simd_fsqrt(a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)
+#[doc = "Calculates the square root of each lane."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(fsqrt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
-    transmute(a)
+pub unsafe fn vsqrt_f64(a: float64x1_t) -> float64x1_t {
+    simd_fsqrt(a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)
+#[doc = "Calculates the square root of each lane."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(fsqrt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
-    transmute(a)
+pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
+    simd_fsqrt(a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
-    transmute(a)
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
+pub unsafe fn vsrid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    transmute(vsri_n_s64::<N>(transmute(a), transmute(b)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
-    transmute(a)
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
+pub unsafe fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    transmute(vsri_n_u64::<N>(transmute(a), transmute(b)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64"
+        )]
+        fn _vst1_f64_x2(a: float64x1_t, b: float64x1_t, ptr: *mut f64);
+    }
+    _vst1_f64_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64"
+        )]
+        fn _vst1q_f64_x2(a: float64x2_t, b: float64x2_t, ptr: *mut f64);
+    }
+    _vst1q_f64_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64"
+        )]
+        fn _vst1_f64_x3(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64);
+    }
+    _vst1_f64_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64"
+        )]
+        fn _vst1q_f64_x3(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64);
+    }
+    _vst1q_f64_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64"
+        )]
+        fn _vst1_f64_x4(
+            a: float64x1_t,
+            b: float64x1_t,
+            c: float64x1_t,
+            d: float64x1_t,
+            ptr: *mut f64,
+        );
+    }
+    _vst1_f64_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64"
+        )]
+        fn _vst1q_f64_x4(
+            a: float64x2_t,
+            b: float64x2_t,
+            c: float64x2_t,
+            d: float64x2_t,
+            ptr: *mut f64,
+        );
+    }
+    _vst1q_f64_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst1_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1_t) {
+    static_assert!(LANE == 0);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v1f64.p0i8"
+        )]
+        fn _vst2_f64(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
+    }
+    _vst2_f64(b.0, b.1, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
+    static_assert!(LANE == 0);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8"
+        )]
+        fn _vst2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8);
+    }
+    _vst2_lane_f64(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
+    static_assert!(LANE == 0);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8"
+        )]
+        fn _vst2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8);
+    }
+    _vst2_lane_s64(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst2_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x2_t) {
+    static_assert!(LANE == 0);
+    vst2_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst2_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x2_t) {
+    static_assert!(LANE == 0);
+    vst2_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v2f64.p0i8"
+        )]
+        fn _vst2q_f64(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
+    }
+    _vst2q_f64(b.0, b.1, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v2i64.p0i8"
+        )]
+        fn _vst2q_s64(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
+    }
+    _vst2q_s64(b.0, b.1, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8"
+        )]
+        fn _vst2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8);
+    }
+    _vst2q_lane_f64(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8"
+        )]
+        fn _vst2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8);
+    }
+    _vst2q_lane_s8(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8"
+        )]
+        fn _vst2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8);
+    }
+    _vst2q_lane_s64(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vst2q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst2q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst2q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x2_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst2q_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst2q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst2q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst2q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x2_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst2q_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) {
+    vst2q_s64(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) {
+    vst2q_s64(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
-    transmute(a)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v1f64.p0i8"
+        )]
+        fn _vst3_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
+    }
+    _vst3_f64(b.0, b.1, b.2, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
+    static_assert!(LANE == 0);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8"
+        )]
+        fn _vst3_lane_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8);
+    }
+    _vst3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
+    static_assert!(LANE == 0);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8"
+        )]
+        fn _vst3_lane_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8);
+    }
+    _vst3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
-    transmute(a)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x3_t) {
+    static_assert!(LANE == 0);
+    vst3_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.i64")]
-        fn vrshld_s64_(a: i64, b: i64) -> i64;
-    }
-    vrshld_s64_(a, b)
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x3_t) {
+    static_assert!(LANE == 0);
+    vst3_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Unsigned rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.i64")]
-        fn vrshld_u64_(a: u64, b: i64) -> u64;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v2f64.p0i8"
+        )]
+        fn _vst3q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
     }
-    vrshld_u64_(a, b)
+    _vst3q_f64(b.0, b.1, b.2, a as _)
 }
 
-/// Signed rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    vrshld_s64(a, -N as i64)
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v2i64.p0i8"
+        )]
+        fn _vst3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
+    }
+    _vst3q_s64(b.0, b.1, b.2, a as _)
 }
 
-/// Unsigned rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    vrshld_u64(a, -N as i64)
+pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8"
+        )]
+        fn _vst3q_lane_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8);
+    }
+    _vst3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8"
+        )]
+        fn _vst3q_lane_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8);
+    }
+    _vst3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8"
+        )]
+        fn _vst3q_lane_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8);
+    }
+    _vst3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst3q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x3_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst3q_lane_s8::<LANE>(transmute(a), transmute(b))
+}
+
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst3q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x3_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst3q_lane_s8::<LANE>(transmute(a), transmute(b))
+}
+
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) {
+    vst3q_s64(transmute(a), transmute(b))
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) {
+    vst3q_s64(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right and accumulate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    let b: i64 = vrshrd_n_s64::<N>(b);
-    a.wrapping_add(b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v1f64.p0i8"
+        )]
+        fn _vst4_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
+    }
+    _vst4_f64(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Unsigned rounding shift right and accumulate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshr, N = 2))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    let b: u64 = vrshrd_n_u64::<N>(b);
-    a.wrapping_add(b)
+pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
+    static_assert!(LANE == 0);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8"
+        )]
+        fn _vst4_lane_f64(
+            a: float64x1_t,
+            b: float64x1_t,
+            c: float64x1_t,
+            d: float64x1_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let x: int8x8_t = vrsubhn_s16(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
+    static_assert!(LANE == 0);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8"
+        )]
+        fn _vst4_lane_s64(
+            a: int64x1_t,
+            b: int64x1_t,
+            c: int64x1_t,
+            d: int64x1_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let x: int16x4_t = vrsubhn_s32(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x4_t) {
+    static_assert!(LANE == 0);
+    vst4_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let x: int32x2_t = vrsubhn_s64(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x4_t) {
+    static_assert!(LANE == 0);
+    vst4_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let x: uint8x8_t = vrsubhn_u16(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v2f64.p0i8"
+        )]
+        fn _vst4q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
+    }
+    _vst4q_f64(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let x: uint16x4_t = vrsubhn_u32(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v2i64.p0i8"
+        )]
+        fn _vst4q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
+    }
+    _vst4q_s64(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let x: uint32x2_t = vrsubhn_u64(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3])
+pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8"
+        )]
+        fn _vst4q_lane_f64(
+            a: float64x2_t,
+            b: float64x2_t,
+            c: float64x2_t,
+            d: float64x2_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vset_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    simd_insert!(b, LANE as u32, a)
+pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8"
+        )]
+        fn _vst4q_lane_s8(
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
+pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(b, LANE as u32, a)
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8"
+        )]
+        fn _vst4q_lane_s64(
+            a: int64x2_t,
+            b: int64x2_t,
+            c: int64x2_t,
+            d: int64x2_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Signed Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
-    transmute(vshl_s64(transmute(a), transmute(b)))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst4q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Unsigned Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
-    transmute(vshl_u64(transmute(a), transmute(b)))
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x4_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst4q_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    let b: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vshll_n_s8::<N>(b)
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst4q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    let b: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    vshll_n_s16::<N>(b)
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x4_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst4q_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    let b: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    vshll_n_s32::<N>(b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) {
+    vst4q_s64(transmute(a), transmute(b))
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    let b: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vshll_n_u8::<N>(b)
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) {
+    vst4q_s64(transmute(a), transmute(b))
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    let b: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    vshll_n_u16::<N>(b)
+#[cfg_attr(test, assert_instr(fsub))]
+pub unsafe fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_sub(a, b)
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    let b: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-    vshll_n_u32::<N>(b)
+#[cfg_attr(test, assert_instr(fsub))]
+pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_sub(a, b)
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
+    a.wrapping_sub(b)
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
+    a.wrapping_sub(b)
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vshrn_n_s64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(ssubl))]
+pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let d: int16x8_t = simd_cast(c);
+    let e: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: int16x8_t = simd_cast(e);
+    simd_sub(d, f)
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(a, vshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(test, assert_instr(ssubl))]
+pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let d: int32x4_t = simd_cast(c);
+    let e: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let f: int32x4_t = simd_cast(e);
+    simd_sub(d, f)
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(ssubl))]
+pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let d: int64x2_t = simd_cast(c);
+    let e: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let f: int64x2_t = simd_cast(e);
+    simd_sub(d, f)
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(usubl))]
+pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let d: uint16x8_t = simd_cast(c);
+    let e: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: uint16x8_t = simd_cast(e);
+    simd_sub(d, f)
 }
 
-/// SM3PARTW1
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3partw1))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm3partw1")]
-        fn vsm3partw1q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
-    }
-    vsm3partw1q_u32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(usubl))]
+pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let d: uint32x4_t = simd_cast(c);
+    let e: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let f: uint32x4_t = simd_cast(e);
+    simd_sub(d, f)
 }
 
-/// SM3PARTW2
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3partw2))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm3partw2")]
-        fn vsm3partw2q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
-    }
-    vsm3partw2q_u32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(usubl))]
+pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let d: uint64x2_t = simd_cast(c);
+    let e: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let f: uint64x2_t = simd_cast(e);
+    simd_sub(d, f)
 }
 
-/// SM3SS1
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3ss1))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm3ss1")]
-        fn vsm3ss1q_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t;
-    }
-    vsm3ss1q_u32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ssubw))]
+pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    let c: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    simd_sub(a, simd_cast(c))
 }
 
-/// SM4 key
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm4ekey))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm4ekey")]
-        fn vsm4ekeyq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-    vsm4ekeyq_u32_(a, b)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ssubw))]
+pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    let c: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    simd_sub(a, simd_cast(c))
 }
 
-/// SM4 encode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm4e))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sm4e")]
-        fn vsm4eq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-    vsm4eq_u32_(a, b)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ssubw))]
+pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let c: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    simd_sub(a, simd_cast(c))
 }
 
-/// Rotate and exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrax1q_u64)
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(rax1))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.rax1")]
-        fn vrax1q_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
-    }
-    vrax1q_u64_(a, b)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(usubw))]
+pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    let c: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    simd_sub(a, simd_cast(c))
 }
 
-/// SHA512 hash update part 1
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512h))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sha512h")]
-        fn vsha512hq_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
-    }
-    vsha512hq_u64_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(usubw))]
+pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    let c: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    simd_sub(a, simd_cast(c))
 }
 
-/// SHA512 hash update part 2
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512h2))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sha512h2")]
-        fn vsha512h2q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
-    }
-    vsha512h2q_u64_(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(usubw))]
+pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    let c: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    simd_sub(a, simd_cast(c))
 }
 
-/// SHA512 schedule update 0
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512su0))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sha512su0")]
-        fn vsha512su0q_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
-    }
-    vsha512su0q_u64_(a, b)
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub unsafe fn vsudot_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int8x8_t,
+    c: uint8x16_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vusdot_s32(a, transmute(c), b)
 }
 
-/// SHA512 schedule update 1
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512su1))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.crypto.sha512su1")]
-        fn vsha512su1q_u64_(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t;
-    }
-    vsha512su1q_u64_(a, b, c)
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub unsafe fn vsudotq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int8x16_t,
+    c: uint8x16_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vusdotq_s32(a, transmute(c), b)
 }
 
-/// Floating-point round to 32-bit integer, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32x.v2f32")]
-        fn vrnd32x_f32_(a: float32x2_t) -> float32x2_t;
-    }
-    vrnd32x_f32_(a)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Floating-point round to 32-bit integer, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32x.v4f32")]
-        fn vrnd32xq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-    vrnd32xq_f32_(a)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Floating-point round to 32-bit integer, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32x.v2f64")]
-        fn vrnd32xq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrnd32xq_f64_(a)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Floating-point round to 32-bit integer, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.frint32x.f64")]
-        fn vrnd32x_f64_(a: f64) -> f64;
-    }
-    transmute(vrnd32x_f64_(simd_extract!(a, 0)))
-}
-
-/// Floating-point round to 32-bit integer toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32z.v2f32")]
-        fn vrnd32z_f32_(a: float32x2_t) -> float32x2_t;
-    }
-    vrnd32z_f32_(a)
-}
-
-/// Floating-point round to 32-bit integer toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32z.v4f32")]
-        fn vrnd32zq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-    vrnd32zq_f32_(a)
-}
-
-/// Floating-point round to 32-bit integer toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint32z.v2f64")]
-        fn vrnd32zq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrnd32zq_f64_(a)
-}
-
-/// Floating-point round to 32-bit integer toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.frint32z.f64")]
-        fn vrnd32z_f64_(a: f64) -> f64;
-    }
-    transmute(vrnd32z_f64_(simd_extract!(a, 0)))
-}
-
-/// Floating-point round to 64-bit integer, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64x.v2f32")]
-        fn vrnd64x_f32_(a: float32x2_t) -> float32x2_t;
-    }
-    vrnd64x_f32_(a)
-}
-
-/// Floating-point round to 64-bit integer, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64x.v4f32")]
-        fn vrnd64xq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-    vrnd64xq_f32_(a)
-}
-
-/// Floating-point round to 64-bit integer, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64x.v2f64")]
-        fn vrnd64xq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrnd64xq_f64_(a)
-}
-
-/// Floating-point round to 64-bit integer, using current rounding mode
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.frint64x.f64")]
-        fn vrnd64x_f64_(a: f64) -> f64;
-    }
-    transmute(vrnd64x_f64_(simd_extract!(a, 0)))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Floating-point round to 64-bit integer toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64z.v2f32")]
-        fn vrnd64z_f32_(a: float32x2_t) -> float32x2_t;
-    }
-    vrnd64z_f32_(a)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Floating-point round to 64-bit integer toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64z.v4f32")]
-        fn vrnd64zq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-    vrnd64zq_f32_(a)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Floating-point round to 64-bit integer toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frint64z.v2f64")]
-        fn vrnd64zq_f64_(a: float64x2_t) -> float64x2_t;
-    }
-    vrnd64zq_f64_(a)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Floating-point round to 64-bit integer toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.frint64z.f64")]
-        fn vrnd64z_f64_(a: f64) -> f64;
-    }
-    transmute(vrnd64z_f64_(simd_extract!(a, 0)))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+pub unsafe fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle!(a, b, [0, 4, 2, 6])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
+    simd_shuffle!(
+        a,
+        b,
+        [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+    )
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     simd_shuffle!(a, b, [0, 4, 2, 6])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     simd_shuffle!(a, b, [0, 4, 2, 6])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
+    simd_shuffle!(
+        a,
+        b,
+        [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+    )
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     simd_shuffle!(a, b, [0, 4, 2, 6])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     simd_shuffle!(a, b, [0, 4, 2, 6])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30])
+    simd_shuffle!(
+        a,
+        b,
+        [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+    )
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     simd_shuffle!(a, b, [0, 4, 2, 6])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 pub unsafe fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_s64)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_u32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_u64)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_p64)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle!(a, b, [0, 4, 2, 6])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f64)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+pub unsafe fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle!(a, b, [1, 5, 3, 7])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+    simd_shuffle!(
+        a,
+        b,
+        [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+    )
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     simd_shuffle!(a, b, [1, 5, 3, 7])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     simd_shuffle!(a, b, [1, 5, 3, 7])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+    simd_shuffle!(
+        a,
+        b,
+        [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+    )
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     simd_shuffle!(a, b, [1, 5, 3, 7])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     simd_shuffle!(a, b, [1, 5, 3, 7])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31])
+    simd_shuffle!(
+        a,
+        b,
+        [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+    )
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     simd_shuffle!(a, b, [1, 5, 3, 7])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)
+#[doc = "Transpose vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
 pub unsafe fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15])
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_s32)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle!(a, b, [1, 3])
+pub unsafe fn vtst_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t {
+    let c: int64x1_t = simd_and(a, b);
+    let d: i64x1 = i64x1::new(0);
+    simd_ne(c, transmute(d))
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_s64)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle!(a, b, [1, 3])
+pub unsafe fn vtstq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
+    let c: int64x2_t = simd_and(a, b);
+    let d: i64x2 = i64x2::new(0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_u32)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_shuffle!(a, b, [1, 3])
+pub unsafe fn vtst_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t {
+    let c: poly64x1_t = simd_and(a, b);
+    let d: i64x1 = i64x1::new(0);
+    simd_ne(c, transmute(d))
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_u64)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle!(a, b, [1, 3])
-}
-
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_p64)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle!(a, b, [1, 3])
+pub unsafe fn vtstq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
+    let c: poly64x2_t = simd_and(a, b);
+    let d: i64x2 = i64x2::new(0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f32)
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle!(a, b, [1, 5, 3, 7])
+pub unsafe fn vtst_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    let c: uint64x1_t = simd_and(a, b);
+    let d: u64x1 = u64x1::new(0);
+    simd_ne(c, transmute(d))
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f32)
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(test, assert_instr(cmtst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle!(a, b, [1, 3])
+pub unsafe fn vtstq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    let c: uint64x2_t = simd_and(a, b);
+    let d: u64x2 = u64x2::new(0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Transpose vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f64)
+#[doc = "Compare bitwise test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(test, assert_instr(tst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle!(a, b, [1, 3])
+pub unsafe fn vtstd_s64(a: i64, b: i64) -> u64 {
+    transmute(vtst_s64(transmute(a), transmute(b)))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)
+#[doc = "Compare bitwise test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(test, assert_instr(tst))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+pub unsafe fn vtstd_u64(a: u64, b: u64) -> u64 {
+    transmute(vtst_u64(transmute(a), transmute(b)))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)
+#[doc = "Signed saturating accumulate of unsigned value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddb_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+pub unsafe fn vuqaddb_s8(a: i8, b: u8) -> i8 {
+    simd_extract!(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)), 0)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)
+#[doc = "Signed saturating accumulate of unsigned value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle!(a, b, [0, 4, 1, 5])
+pub unsafe fn vuqaddh_s16(a: i16, b: u16) -> i16 {
+    simd_extract!(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)), 0)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)
+#[doc = "Signed saturating accumulate of unsigned value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+pub unsafe fn vuqaddd_s64(a: i64, b: u64) -> i64 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.i64"
+        )]
+        fn _vuqaddd_s64(a: i64, b: i64) -> i64;
+    }
+    _vuqaddd_s64(a, b.as_signed())
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)
+#[doc = "Signed saturating accumulate of unsigned value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadds_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle!(a, b, [0, 2])
+pub unsafe fn vuqadds_s32(a: i32, b: u32) -> i32 {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.i32"
+        )]
+        fn _vuqadds_s32(a: i32, b: i32) -> i32;
+    }
+    _vuqadds_s32(a, b.as_signed())
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle!(a, b, [0, 4, 1, 5])
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(test, assert_instr(usdot, LANE = 3))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub unsafe fn vusdot_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: uint8x8_t,
+    c: int8x16_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = transmute(c);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vusdot_s32(a, b, transmute(c))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(test, assert_instr(usdot, LANE = 3))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub unsafe fn vusdotq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: uint8x16_t,
+    c: int8x16_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = transmute(c);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vusdotq_s32(a, b, transmute(c))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle!(a, b, [0, 4, 1, 5])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     simd_shuffle!(a, b, [0, 2])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle!(a, b, [0, 4, 1, 5])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     simd_shuffle!(a, b, [0, 2])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle!(a, b, [0, 4, 1, 5])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+    )
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle!(a, b, [0, 4, 1, 5])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+    )
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle!(a, b, [2, 6, 3, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_shuffle!(a, b, [1, 3])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+    )
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle!(a, b, [2, 6, 3, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle!(a, b, [1, 3])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+pub unsafe fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle!(a, b, [2, 6, 3, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     simd_shuffle!(a, b, [1, 3])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle!(a, b, [2, 6, 3, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     simd_shuffle!(a, b, [1, 3])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31])
-}
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+}
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle!(a, b, [2, 6, 3, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+    )
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p64)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle!(a, b, [1, 3])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle!(a, b, [1, 3])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle!(a, b, [2, 6, 3, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle!(a, b, [1, 3])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+    )
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s16)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s32)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+    )
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u8)
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+pub unsafe fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle!(a, b, [0, 4, 1, 5])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p8)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p8)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+    )
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_p16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle!(a, b, [0, 4, 1, 5])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_s32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     simd_shuffle!(a, b, [0, 2])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_s64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle!(a, b, [0, 4, 1, 5])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_u32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     simd_shuffle!(a, b, [0, 2])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_u64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_p64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+    )
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle!(a, b, [0, 2, 4, 6])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle!(a, b, [0, 4, 1, 5])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle!(a, b, [0, 2])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     simd_shuffle!(a, b, [0, 2])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s8)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle!(a, b, [0, 4, 1, 5])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s8)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+    )
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle!(a, b, [0, 4, 1, 5])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u8)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u8)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+pub unsafe fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle!(a, b, [0, 2])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_shuffle!(a, b, [2, 6, 3, 7])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p8)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p8)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+    )
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_p16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_shuffle!(a, b, [2, 6, 3, 7])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_s32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     simd_shuffle!(a, b, [1, 3])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_s64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_shuffle!(a, b, [1, 3])
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_shuffle!(a, b, [2, 6, 3, 7])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_u32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     simd_shuffle!(a, b, [1, 3])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_u64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_shuffle!(a, b, [1, 3])
-}
-
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_p64)
-#[inline]
-#[target_feature(enable = "neon")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_shuffle!(a, b, [1, 3])
-}
-
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_shuffle!(a, b, [1, 3, 5, 7])
+pub unsafe fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_shuffle!(a, b, [1, 3])
-}
-
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f64)
-#[inline]
-#[target_feature(enable = "neon")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_shuffle!(a, b, [1, 3])
-}
-
-/// Unsigned Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let e: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let f: uint8x8_t = vabd_u8(d, e);
-    simd_add(a, simd_cast(f))
-}
-
-/// Unsigned Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    let d: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let e: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    let f: uint16x4_t = vabd_u16(d, e);
-    simd_add(a, simd_cast(f))
-}
-
-/// Unsigned Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    let d: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let e: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-    let f: uint32x2_t = vabd_u32(d, e);
-    simd_add(a, simd_cast(f))
-}
-
-/// Signed Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_s8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let e: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let f: int8x8_t = vabd_s8(d, e);
-    let f: uint8x8_t = simd_cast(f);
-    simd_add(a, simd_cast(f))
-}
-
-/// Signed Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_s16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let e: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    let f: int16x4_t = vabd_s16(d, e);
-    let f: uint16x4_t = simd_cast(f);
-    simd_add(a, simd_cast(f))
-}
-
-/// Signed Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_s32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    let d: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let e: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-    let f: int32x2_t = vabd_s32(d, e);
-    let f: uint32x2_t = simd_cast(f);
-    simd_add(a, simd_cast(f))
+pub unsafe fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+    )
 }
 
-/// Signed saturating Absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v1i64")]
-        fn vqabs_s64_(a: int64x1_t) -> int64x1_t;
-    }
-    vqabs_s64_(a)
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_shuffle!(a, b, [2, 6, 3, 7])
 }
 
-/// Signed saturating Absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v2i64")]
-        fn vqabsq_s64_(a: int64x2_t) -> int64x2_t;
-    }
-    vqabsq_s64_(a)
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
 }
 
-/// Signed saturating absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsb_s8)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqabsb_s8(a: i8) -> i8 {
-    simd_extract!(vqabs_s8(vdup_n_s8(a)), 0)
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Signed saturating absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsh_s16)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqabsh_s16(a: i16) -> i16 {
-    simd_extract!(vqabs_s16(vdup_n_s16(a)), 0)
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_shuffle!(a, b, [2, 6, 3, 7])
 }
 
-/// Signed saturating absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabss_s32)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqabss_s32(a: i32) -> i32 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.i32")]
-        fn vqabss_s32_(a: i32) -> i32;
-    }
-    vqabss_s32_(a)
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
 
-/// Signed saturating absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsd_s64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqabsd_s64(a: i64) -> i64 {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.i64")]
-        fn vqabsd_s64_(a: i64) -> i64;
-    }
-    vqabsd_s64_(a)
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
 }
 
-/// Shift left and insert
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vslid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N >= 0 && N <= 63);
-    transmute(vsli_n_s64::<N>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_shuffle!(
+        a,
+        b,
+        [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+    )
 }
 
-/// Shift left and insert
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N >= 0 && N <= 63);
-    transmute(vsli_n_u64::<N>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_shuffle!(a, b, [2, 6, 3, 7])
 }
 
-/// Shift right and insert
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_s64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsrid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    transmute(vsri_n_s64::<N>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15])
 }
 
-/// Shift right and insert
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_u64)
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    transmute(vsri_n_u64::<N>(transmute(a), transmute(b)))
-}
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    use crate::core_arch::simd::*;
-    use std::mem::transmute;
-    use stdarch_test::simd_test;
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_veor3q_s8() {
-        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let c: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: i8x16 = transmute(veor3q_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_veor3q_s16() {
-        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let c: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i16x8 = transmute(veor3q_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_veor3q_s32() {
-        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let c: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i32x4 = transmute(veor3q_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_veor3q_s64() {
-        let a: i64x2 = i64x2::new(0x00, 0x01);
-        let b: i64x2 = i64x2::new(0x00, 0x00);
-        let c: i64x2 = i64x2::new(0x00, 0x00);
-        let e: i64x2 = i64x2::new(0x00, 0x01);
-        let r: i64x2 = transmute(veor3q_s64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_veor3q_u8() {
-        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let c: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: u8x16 = transmute(veor3q_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_veor3q_u16() {
-        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let c: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u16x8 = transmute(veor3q_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_veor3q_u32() {
-        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let c: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u32x4 = transmute(veor3q_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_veor3q_u64() {
-        let a: u64x2 = u64x2::new(0x00, 0x01);
-        let b: u64x2 = u64x2::new(0x00, 0x00);
-        let c: u64x2 = u64x2::new(0x00, 0x00);
-        let e: u64x2 = u64x2::new(0x00, 0x01);
-        let r: u64x2 = transmute(veor3q_u64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 9.0;
-        let e: f64 = 8.0;
-        let r: f64 = transmute(vabd_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let b: f64x2 = f64x2::new(9.0, 3.0);
-        let e: f64x2 = f64x2::new(8.0, 1.0);
-        let r: f64x2 = transmute(vabdq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabds_f32() {
-        let a: f32 = 1.0;
-        let b: f32 = 9.0;
-        let e: f32 = 8.0;
-        let r: f32 = vabds_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdd_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 9.0;
-        let e: f64 = 8.0;
-        let r: f64 = vabdd_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
-        let e: u16x8 = u16x8::new(1, 0, 1, 2, 3, 4, 5, 6);
-        let r: u16x8 = transmute(vabdl_high_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 8, 9, 11, 12);
-        let b: u16x8 = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: u32x4 = u32x4::new(2, 1, 1, 2);
-        let r: u32x4 = transmute(vabdl_high_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(10, 10, 10, 10);
-        let e: u64x2 = u64x2::new(7, 6);
-        let r: u64x2 = transmute(vabdl_high_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10);
-        let e: i16x8 = i16x8::new(1, 0, 1, 2, 3, 4, 5, 6);
-        let r: i16x8 = transmute(vabdl_high_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
-        let b: i16x8 = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: i32x4 = i32x4::new(1, 0, 1, 2);
-        let r: i32x4 = transmute(vabdl_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_high_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(10, 10, 10, 10);
-        let e: i64x2 = i64x2::new(7, 6);
-        let r: i64x2 = transmute(vabdl_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u64() {
-        let a: u64x2 = u64x2::new(0, 0x01);
-        let b: u64x2 = u64x2::new(0, 0x01);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u64x2 = u64x2::new(0, 0);
-        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x01);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x01);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_p64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_p64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x01);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x01);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i64x2 = i64x2::new(-9223372036854775808, -9223372036854775808);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_f64() {
-        let a: f64 = 1.2;
-        let b: f64 = 1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_f64() {
-        let a: f64x2 = f64x2::new(1.2, 3.4);
-        let b: f64x2 = f64x2::new(1.2, 3.4);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqd_s64() {
-        let a: i64 = 1;
-        let b: i64 = 2;
-        let e: u64 = 0;
-        let r: u64 = vceqd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqd_u64() {
-        let a: u64 = 1;
-        let b: u64 = 2;
-        let e: u64 = 0;
-        let r: u64 = vceqd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqs_f32() {
-        let a: f32 = 1.;
-        let b: f32 = 2.;
-        let e: u32 = 0;
-        let r: u32 = vceqs_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqd_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: u64 = 0;
-        let r: u64 = vceqd_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_s8() {
-        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vceqz_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_s8() {
-        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vceqzq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_s16() {
-        let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
-        let e: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
-        let r: u16x4 = transmute(vceqz_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_s16() {
-        let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vceqzq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, 0x00);
-        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceqz_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
-        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
-        let r: u32x4 = transmute(vceqzq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vceqz_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqzq_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_p8() {
-        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vceqz_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_p8() {
-        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vceqzq_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_p64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vceqz_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_p64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqzq_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_u8() {
-        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vceqz_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_u8() {
-        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vceqzq_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_u16() {
-        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0);
-        let r: u16x4 = transmute(vceqz_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_u16() {
-        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vceqzq_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_u32() {
-        let a: u32x2 = u32x2::new(0, 0x00);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceqz_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_u32() {
-        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
-        let r: u32x4 = transmute(vceqzq_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceqz_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_u64() {
-        let a: u64x2 = u64x2::new(0, 0x00);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vceqzq_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_f32() {
-        let a: f32x2 = f32x2::new(0.0, 1.2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vceqz_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_f32() {
-        let a: f32x4 = f32x4::new(0.0, 1.2, 3.4, 5.6);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0);
-        let r: u32x4 = transmute(vceqzq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqz_f64() {
-        let a: f64 = 0.0;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vceqz_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzq_f64() {
-        let a: f64x2 = f64x2::new(0.0, 1.2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vceqzq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzd_s64() {
-        let a: i64 = 1;
-        let e: u64 = 0;
-        let r: u64 = vceqzd_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzd_u64() {
-        let a: u64 = 1;
-        let e: u64 = 0;
-        let r: u64 = vceqzd_u64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzs_f32() {
-        let a: f32 = 1.;
-        let e: u32 = 0;
-        let r: u32 = vceqzs_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqzd_f64() {
-        let a: f64 = 1.;
-        let e: u64 = 0;
-        let r: u64 = vceqzd_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vtst_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vtstq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_p64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let b: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vtst_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_p64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let b: i64x2 = i64x2::new(-9223372036854775808, 0x00);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vtstq_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vtst_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_u64() {
-        let a: u64x2 = u64x2::new(0, 0x00);
-        let b: u64x2 = u64x2::new(0, 0x00);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vtstq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstd_s64() {
-        let a: i64 = 0;
-        let b: i64 = 0;
-        let e: u64 = 0;
-        let r: u64 = vtstd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstd_u64() {
-        let a: u64 = 0;
-        let b: u64 = 0;
-        let e: u64 = 0;
-        let r: u64 = vtstd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqadds_s32() {
-        let a: i32 = 1;
-        let b: u32 = 1;
-        let e: i32 = 2;
-        let r: i32 = vuqadds_s32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqaddd_s64() {
-        let a: i64 = 1;
-        let b: u64 = 1;
-        let e: i64 = 2;
-        let r: i64 = vuqaddd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqaddb_s8() {
-        let a: i8 = 1;
-        let b: u8 = 2;
-        let e: i8 = 3;
-        let r: i8 = vuqaddb_s8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqaddh_s16() {
-        let a: i16 = 1;
-        let b: u16 = 2;
-        let e: i16 = 3;
-        let r: i16 = vuqaddh_s16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabs_f64() {
-        let a: f64 = -0.1;
-        let e: f64 = 0.1;
-        let r: f64 = transmute(vabs_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabsq_f64() {
-        let a: f64x2 = f64x2::new(-0.1, -2.2);
-        let e: f64x2 = f64x2::new(0.1, 2.2);
-        let r: f64x2 = transmute(vabsq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcgt_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgtq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcgt_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgtq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_f64() {
-        let a: f64 = 1.2;
-        let b: f64 = 0.1;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcgt_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_f64() {
-        let a: f64x2 = f64x2::new(1.2, 2.3);
-        let b: f64x2 = f64x2::new(0.1, 1.2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgtq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtd_s64() {
-        let a: i64 = 1;
-        let b: i64 = 2;
-        let e: u64 = 0;
-        let r: u64 = vcgtd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtd_u64() {
-        let a: u64 = 1;
-        let b: u64 = 2;
-        let e: u64 = 0;
-        let r: u64 = vcgtd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgts_f32() {
-        let a: f32 = 1.;
-        let b: f32 = 2.;
-        let e: u32 = 0;
-        let r: u32 = vcgts_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtd_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: u64 = 0;
-        let r: u64 = vcgtd_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let b: i64x1 = i64x1::new(1);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclt_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcltq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclt_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcltq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_f64() {
-        let a: f64 = 0.1;
-        let b: f64 = 1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclt_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_f64() {
-        let a: f64x2 = f64x2::new(0.1, 1.2);
-        let b: f64x2 = f64x2::new(1.2, 2.3);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcltq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltd_s64() {
-        let a: i64 = 2;
-        let b: i64 = 1;
-        let e: u64 = 0;
-        let r: u64 = vcltd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltd_u64() {
-        let a: u64 = 2;
-        let b: u64 = 1;
-        let e: u64 = 0;
-        let r: u64 = vcltd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclts_f32() {
-        let a: f32 = 2.;
-        let b: f32 = 1.;
-        let e: u32 = 0;
-        let r: u32 = vclts_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltd_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 1.;
-        let e: u64 = 0;
-        let r: u64 = vcltd_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let b: i64x1 = i64x1::new(1);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcle_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcleq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcged_s64() {
-        let a: i64 = 1;
-        let b: i64 = 2;
-        let e: u64 = 0;
-        let r: u64 = vcged_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcged_u64() {
-        let a: u64 = 1;
-        let b: u64 = 2;
-        let e: u64 = 0;
-        let r: u64 = vcged_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcges_f32() {
-        let a: f32 = 1.;
-        let b: f32 = 2.;
-        let e: u32 = 0;
-        let r: u32 = vcges_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcged_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: u64 = 0;
-        let r: u64 = vcged_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcle_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcleq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_f64() {
-        let a: f64 = 0.1;
-        let b: f64 = 1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcle_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_f64() {
-        let a: f64x2 = f64x2::new(0.1, 1.2);
-        let b: f64x2 = f64x2::new(1.2, 2.3);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcleq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcled_s64() {
-        let a: i64 = 2;
-        let b: i64 = 1;
-        let e: u64 = 0;
-        let r: u64 = vcled_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcled_u64() {
-        let a: u64 = 2;
-        let b: u64 = 1;
-        let e: u64 = 0;
-        let r: u64 = vcled_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcles_f32() {
-        let a: f32 = 2.;
-        let b: f32 = 1.;
-        let e: u32 = 0;
-        let r: u32 = vcles_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcled_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 1.;
-        let e: u64 = 0;
-        let r: u64 = vcled_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcge_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgeq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcge_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgeq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_f64() {
-        let a: f64 = 1.2;
-        let b: f64 = 0.1;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcge_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_f64() {
-        let a: f64x2 = f64x2::new(1.2, 2.3);
-        let b: f64x2 = f64x2::new(0.1, 1.2);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgeq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcgez_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
-        let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgezq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcgez_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgezq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vcgez_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgezq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcgez_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vcgezq_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcgez_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgezq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgez_f64() {
-        let a: f64 = -1.2;
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcgez_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcgezq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezd_s64() {
-        let a: i64 = -1;
-        let e: u64 = 0;
-        let r: u64 = vcgezd_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezs_f32() {
-        let a: f32 = -1.;
-        let e: u32 = 0;
-        let r: u32 = vcgezs_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgezd_f64() {
-        let a: f64 = -1.;
-        let e: u64 = 0;
-        let r: u64 = vcgezd_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcgtz_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgtzq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: u16x4 = u16x4::new(0, 0, 0, 0xFF_FF);
-        let r: u16x4 = transmute(vcgtz_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgtzq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vcgtz_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtzq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcgtz_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vcgtzq_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vcgtz_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtzq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtz_f64() {
-        let a: f64 = -1.2;
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcgtz_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vcgtzq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzd_s64() {
-        let a: i64 = -1;
-        let e: u64 = 0;
-        let r: u64 = vcgtzd_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzs_f32() {
-        let a: f32 = -1.;
-        let e: u32 = 0;
-        let r: u32 = vcgtzs_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtzd_f64() {
-        let a: f64 = -1.;
-        let e: u64 = 0;
-        let r: u64 = vcgtzd_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vclez_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vclezq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0);
-        let r: u16x4 = transmute(vclez_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vclezq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclez_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vclezq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclez_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vclezq_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclez_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
-        let r: u32x4 = transmute(vclezq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclez_f64() {
-        let a: f64 = -1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vclez_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclezq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vclezq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclezd_s64() {
-        let a: i64 = 2;
-        let e: u64 = 0;
-        let r: u64 = vclezd_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclezs_f32() {
-        let a: f32 = 2.;
-        let e: u32 = 0;
-        let r: u32 = vclezs_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclezd_f64() {
-        let a: f64 = 2.;
-        let e: u64 = 0;
-        let r: u64 = vclezd_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vcltz_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vcltzq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0);
-        let r: u16x4 = transmute(vcltz_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vcltzq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcltz_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
-        let r: u32x4 = transmute(vcltzq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcltz_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, -1);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcltzq_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vcltz_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0);
-        let r: u32x4 = transmute(vcltzq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltz_f64() {
-        let a: f64 = -1.2;
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcltz_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: u64x2 = transmute(vcltzq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzd_s64() {
-        let a: i64 = 2;
-        let e: u64 = 0;
-        let r: u64 = vcltzd_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzs_f32() {
-        let a: f32 = 2.;
-        let e: u32 = 0;
-        let r: u32 = vcltzs_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltzd_f64() {
-        let a: f64 = 2.;
-        let e: u64 = 0;
-        let r: u64 = vcltzd_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcagt_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64x1 = u64x1::new(!0);
-        let r: u64x1 = transmute(vcagt_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcagtq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let b: f64x2 = f64x2::new(-1.1, 0.0);
-        let e: u64x2 = u64x2::new(!0, 0);
-        let r: u64x2 = transmute(vcagtq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcagts_f32() {
-        let a: f32 = -1.2;
-        let b: f32 = -1.1;
-        let e: u32 = !0;
-        let r: u32 = vcagts_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcagtd_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64 = !0;
-        let r: u64 = vcagtd_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcage_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64x1 = u64x1::new(!0);
-        let r: u64x1 = transmute(vcage_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcageq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let b: f64x2 = f64x2::new(-1.1, 0.0);
-        let e: u64x2 = u64x2::new(!0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcageq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcages_f32() {
-        let a: f32 = -1.2;
-        let b: f32 = -1.1;
-        let e: u32 = !0;
-        let r: u32 = vcages_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcaged_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64 = !0;
-        let r: u64 = vcaged_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcalt_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcalt_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcaltq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let b: f64x2 = f64x2::new(-1.1, 0.0);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vcaltq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcalts_f32() {
-        let a: f32 = -1.2;
-        let b: f32 = -1.1;
-        let e: u32 = 0;
-        let r: u32 = vcalts_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcaltd_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64 = 0;
-        let r: u64 = vcaltd_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcale_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vcale_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcaleq_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 0.0);
-        let b: f64x2 = f64x2::new(-1.1, 0.0);
-        let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcaleq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcales_f32() {
-        let a: f32 = -1.2;
-        let b: f32 = -1.1;
-        let e: u32 = 0;
-        let r: u32 = vcales_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcaled_f64() {
-        let a: f64 = -1.2;
-        let b: f64 = -1.1;
-        let e: u64 = 0;
-        let r: u64 = vcaled_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
-        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vcopy_lane_s8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vcopyq_laneq_s8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
-        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
-        let r: i16x4 = transmute(vcopy_lane_s16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vcopyq_laneq_s16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
-        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2);
-        let r: i32x2 = transmute(vcopy_lane_s32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
-        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4);
-        let r: i32x4 = transmute(vcopyq_laneq_s32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2);
-        let r: i64x2 = transmute(vcopyq_laneq_s64::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
-        let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vcopy_lane_u8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vcopyq_laneq_u8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
-        let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4);
-        let r: u16x4 = transmute(vcopy_lane_u16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vcopyq_laneq_u16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2);
-        let r: u32x2 = transmute(vcopy_lane_u32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4);
-        let r: u32x4 = transmute(vcopyq_laneq_u32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 2);
-        let r: u64x2 = transmute(vcopyq_laneq_u64::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_p8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
-        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vcopy_lane_p8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_p8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vcopyq_laneq_p8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_p16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
-        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
-        let r: i16x4 = transmute(vcopy_lane_p16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_p16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vcopyq_laneq_p16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_p64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 2);
-        let r: i64x2 = transmute(vcopyq_laneq_p64::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(0., 0.5);
-        let e: f32x2 = f32x2::new(0.5, 2.);
-        let r: f32x2 = transmute(vcopy_lane_f32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x4 = f32x4::new(0., 0.5, 0., 0.);
-        let e: f32x4 = f32x4::new(0.5, 2., 3., 4.);
-        let r: f32x4 = transmute(vcopyq_laneq_f32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64x2 = f64x2::new(0., 0.5);
-        let e: f64x2 = f64x2::new(0.5, 2.);
-        let r: f64x2 = transmute(vcopyq_laneq_f64::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vcopy_laneq_s8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
-        let r: i16x4 = transmute(vcopy_laneq_s16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
-        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 2);
-        let r: i32x2 = transmute(vcopy_laneq_s32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: u8x8 = u8x8::new(0xFF, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vcopy_laneq_u8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0, 0, 0, 0, 0, 0);
-        let e: u16x4 = u16x4::new(0xFF_FF, 2, 3, 4);
-        let r: u16x4 = transmute(vcopy_laneq_u16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 2);
-        let r: u32x2 = transmute(vcopy_laneq_u32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_p8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x16 = i8x16::new(0, 0x7F, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i8x8 = i8x8::new(0x7F, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vcopy_laneq_p8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_p16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 0x7F_FF, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(0x7F_FF, 2, 3, 4);
-        let r: i16x4 = transmute(vcopy_laneq_p16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x4 = f32x4::new(0., 0.5, 0., 0.);
-        let e: f32x2 = f32x2::new(0.5, 2.);
-        let r: f32x2 = transmute(vcopy_laneq_f32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
-        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vcopyq_lane_s8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
-        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vcopyq_lane_s16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
-        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 2, 3, 4);
-        let r: i32x4 = transmute(vcopyq_lane_s32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0);
-        let e: u8x16 = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vcopyq_lane_u8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0, 0);
-        let e: u16x8 = u16x8::new(0xFF_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vcopyq_lane_u16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 2, 3, 4);
-        let r: u32x4 = transmute(vcopyq_lane_u32::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_p8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x8 = i8x8::new(0, 0x7F, 0, 0, 0, 0, 0, 0);
-        let e: i8x16 = i8x16::new(0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vcopyq_lane_p8::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_p16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x4 = i16x4::new(0, 0x7F_FF, 0, 0);
-        let e: i16x8 = i16x8::new(0x7F_FF, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vcopyq_lane_p16::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x2 = transmute(vcopyq_lane_s64::<1, 0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x2 = u64x2::new(1, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x2 = transmute(vcopyq_lane_u64::<1, 0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_p64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x2 = i64x2::new(1, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x2 = transmute(vcopyq_lane_p64::<1, 0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x2 = f32x2::new(0.5, 0.);
-        let e: f32x4 = f32x4::new(1., 0.5, 3., 4.);
-        let r: f32x4 = transmute(vcopyq_lane_f32::<1, 0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopyq_lane_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64 = 0.5;
-        let e: f64x2 = f64x2::new(1., 0.5);
-        let r: f64x2 = transmute(vcopyq_lane_f64::<1, 0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_f64() {
-        let a: u64 = 0;
-        let e: f64 = 0.;
-        let r: f64 = transmute(vcreate_f64(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f64_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: f64 = 1.;
-        let r: f64 = transmute(vcvt_f64_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_f64_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let e: f64x2 = f64x2::new(1., 2.);
-        let r: f64x2 = transmute(vcvtq_f64_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f64_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let e: f64 = 1.;
-        let r: f64 = transmute(vcvt_f64_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_f64_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let e: f64x2 = f64x2::new(1., 2.);
-        let r: f64x2 = transmute(vcvtq_f64_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f64_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 1.2);
-        let e: f64x2 = f64x2::new(-1.2f32 as f64, 1.2f32 as f64);
-        let r: f64x2 = transmute(vcvt_f64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_high_f64_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 1.2, 2.3, 3.4);
-        let e: f64x2 = f64x2::new(2.3f32 as f64, 3.4f32 as f64);
-        let r: f64x2 = transmute(vcvt_high_f64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f32_f64() {
-        let a: f64x2 = f64x2::new(-1.2, 1.2);
-        let e: f32x2 = f32x2::new(-1.2f64 as f32, 1.2f64 as f32);
-        let r: f32x2 = transmute(vcvt_f32_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_high_f32_f64() {
-        let a: f32x2 = f32x2::new(-1.2, 1.2);
-        let b: f64x2 = f64x2::new(-2.3, 3.4);
-        let e: f32x4 = f32x4::new(-1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32);
-        let r: f32x4 = transmute(vcvt_high_f32_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtx_f32_f64() {
-        let a: f64x2 = f64x2::new(-1.0, 2.0);
-        let e: f32x2 = f32x2::new(-1.0, 2.0);
-        let r: f32x2 = transmute(vcvtx_f32_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtxd_f32_f64() {
-        let a: f64 = -1.0;
-        let e: f32 = -1.0;
-        let r: f32 = vcvtxd_f32_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtx_high_f32_f64() {
-        let a: f32x2 = f32x2::new(-1.0, 2.0);
-        let b: f64x2 = f64x2::new(-3.0, 4.0);
-        let e: f32x4 = f32x4::new(-1.0, 2.0, -3.0, 4.0);
-        let r: f32x4 = transmute(vcvtx_high_f32_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_f64_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: f64 = 0.25;
-        let r: f64 = transmute(vcvt_n_f64_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_f64_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let e: f64x2 = f64x2::new(0.25, 0.5);
-        let r: f64x2 = transmute(vcvtq_n_f64_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_n_f32_s32() {
-        let a: i32 = 1;
-        let e: f32 = 0.25;
-        let r: f32 = vcvts_n_f32_s32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_n_f64_s64() {
-        let a: i64 = 1;
-        let e: f64 = 0.25;
-        let r: f64 = vcvtd_n_f64_s64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_f64_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let e: f64 = 0.25;
-        let r: f64 = transmute(vcvt_n_f64_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_f64_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let e: f64x2 = f64x2::new(0.25, 0.5);
-        let r: f64x2 = transmute(vcvtq_n_f64_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_n_f32_u32() {
-        let a: u32 = 1;
-        let e: f32 = 0.25;
-        let r: f32 = vcvts_n_f32_u32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_n_f64_u64() {
-        let a: u64 = 1;
-        let e: f64 = 0.25;
-        let r: f64 = vcvtd_n_f64_u64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_s64_f64() {
-        let a: f64 = 0.25;
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vcvt_n_s64_f64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_s64_f64() {
-        let a: f64x2 = f64x2::new(0.25, 0.5);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vcvtq_n_s64_f64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_n_s32_f32() {
-        let a: f32 = 0.25;
-        let e: i32 = 1;
-        let r: i32 = vcvts_n_s32_f32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_n_s64_f64() {
-        let a: f64 = 0.25;
-        let e: i64 = 1;
-        let r: i64 = vcvtd_n_s64_f64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_u64_f64() {
-        let a: f64 = 0.25;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcvt_n_u64_f64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_u64_f64() {
-        let a: f64x2 = f64x2::new(0.25, 0.5);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vcvtq_n_u64_f64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_n_u32_f32() {
-        let a: f32 = 0.25;
-        let e: u32 = 1;
-        let r: u32 = vcvts_n_u32_f32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_n_u64_f64() {
-        let a: f64 = 0.25;
-        let e: u64 = 1;
-        let r: u64 = vcvtd_n_u64_f64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_f32_s32() {
-        let a: i32 = 1;
-        let e: f32 = 1.;
-        let r: f32 = vcvts_f32_s32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_f64_s64() {
-        let a: i64 = 1;
-        let e: f64 = 1.;
-        let r: f64 = vcvtd_f64_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_f32_u32() {
-        let a: u32 = 1;
-        let e: f32 = 1.;
-        let r: f32 = vcvts_f32_u32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_f64_u64() {
-        let a: u64 = 1;
-        let e: f64 = 1.;
-        let r: f64 = vcvtd_f64_u64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_s32_f32() {
-        let a: f32 = 1.;
-        let e: i32 = 1;
-        let r: i32 = vcvts_s32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_s64_f64() {
-        let a: f64 = 1.;
-        let e: i64 = 1;
-        let r: i64 = vcvtd_s64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvts_u32_f32() {
-        let a: f32 = 1.;
-        let e: u32 = 1;
-        let r: u32 = vcvts_u32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtd_u64_f64() {
-        let a: f64 = 1.;
-        let e: u64 = 1;
-        let r: u64 = vcvtd_u64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64x1 = i64x1::new(-1);
-        let r: i64x1 = transmute(vcvt_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.1, 2.1);
-        let e: i64x2 = i64x2::new(-1, 2);
-        let r: i64x2 = transmute(vcvtq_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcvt_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.1, 2.1);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvta_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.1, 2.1);
-        let e: i32x2 = i32x2::new(-1, 2);
-        let r: i32x2 = transmute(vcvta_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtaq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-1, 2, -3, 4);
-        let r: i32x4 = transmute(vcvtaq_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvta_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64x1 = i64x1::new(-1);
-        let r: i64x1 = transmute(vcvta_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtaq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.1, 2.1);
-        let e: i64x2 = i64x2::new(-1, 2);
-        let r: i64x2 = transmute(vcvtaq_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtas_s32_f32() {
-        let a: f32 = 2.9;
-        let e: i32 = 3;
-        let r: i32 = vcvtas_s32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtad_s64_f64() {
-        let a: f64 = 2.9;
-        let e: i64 = 3;
-        let r: i64 = vcvtad_s64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtas_u32_f32() {
-        let a: f32 = 2.9;
-        let e: u32 = 3;
-        let r: u32 = vcvtas_u32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtad_u64_f64() {
-        let a: f64 = 2.9;
-        let e: u64 = 3;
-        let r: u64 = vcvtad_u64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtn_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 2.1);
-        let e: i32x2 = i32x2::new(-2, 2);
-        let r: i32x2 = transmute(vcvtn_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-2, 2, -3, 4);
-        let r: i32x4 = transmute(vcvtnq_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtn_s64_f64() {
-        let a: f64 = -1.5;
-        let e: i64x1 = i64x1::new(-2);
-        let r: i64x1 = transmute(vcvtn_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 2.1);
-        let e: i64x2 = i64x2::new(-2, 2);
-        let r: i64x2 = transmute(vcvtnq_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtns_s32_f32() {
-        let a: f32 = -1.5;
-        let e: i32 = -2;
-        let r: i32 = vcvtns_s32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnd_s64_f64() {
-        let a: f64 = -1.5;
-        let e: i64 = -2;
-        let r: i64 = vcvtnd_s64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtm_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.1, 2.1);
-        let e: i32x2 = i32x2::new(-2, 2);
-        let r: i32x2 = transmute(vcvtm_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-2, 2, -3, 3);
-        let r: i32x4 = transmute(vcvtmq_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtm_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64x1 = i64x1::new(-2);
-        let r: i64x1 = transmute(vcvtm_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.1, 2.1);
-        let e: i64x2 = i64x2::new(-2, 2);
-        let r: i64x2 = transmute(vcvtmq_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtms_s32_f32() {
-        let a: f32 = -1.1;
-        let e: i32 = -2;
-        let r: i32 = vcvtms_s32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmd_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64 = -2;
-        let r: i64 = vcvtmd_s64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtp_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.1, 2.1);
-        let e: i32x2 = i32x2::new(-1, 3);
-        let r: i32x2 = transmute(vcvtp_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-1, 3, -2, 4);
-        let r: i32x4 = transmute(vcvtpq_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtp_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64x1 = i64x1::new(-1);
-        let r: i64x1 = transmute(vcvtp_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpq_s64_f64() {
-        let a: f64x2 = f64x2::new(-1.1, 2.1);
-        let e: i64x2 = i64x2::new(-1, 3);
-        let r: i64x2 = transmute(vcvtpq_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtps_s32_f32() {
-        let a: f32 = -1.1;
-        let e: i32 = -1;
-        let r: i32 = vcvtps_s32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpd_s64_f64() {
-        let a: f64 = -1.1;
-        let e: i64 = -1;
-        let r: i64 = vcvtpd_s64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvta_u32_f32() {
-        let a: f32x2 = f32x2::new(1.1, 2.1);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vcvta_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtaq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vcvtaq_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvta_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcvta_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtaq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.1, 2.1);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vcvtaq_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtn_u32_f32() {
-        let a: f32x2 = f32x2::new(1.5, 2.1);
-        let e: u32x2 = u32x2::new(2, 2);
-        let r: u32x2 = transmute(vcvtn_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.5, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(2, 2, 3, 4);
-        let r: u32x4 = transmute(vcvtnq_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtn_u64_f64() {
-        let a: f64 = 1.5;
-        let e: u64x1 = u64x1::new(2);
-        let r: u64x1 = transmute(vcvtn_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.5, 2.1);
-        let e: u64x2 = u64x2::new(2, 2);
-        let r: u64x2 = transmute(vcvtnq_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtns_u32_f32() {
-        let a: f32 = 1.5;
-        let e: u32 = 2;
-        let r: u32 = vcvtns_u32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtnd_u64_f64() {
-        let a: f64 = 1.5;
-        let e: u64 = 2;
-        let r: u64 = vcvtnd_u64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtm_u32_f32() {
-        let a: f32x2 = f32x2::new(1.1, 2.1);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vcvtm_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(1, 2, 2, 3);
-        let r: u32x4 = transmute(vcvtmq_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtm_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcvtm_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.1, 2.1);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vcvtmq_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtms_u32_f32() {
-        let a: f32 = 1.1;
-        let e: u32 = 1;
-        let r: u32 = vcvtms_u32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtmd_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64 = 1;
-        let r: u64 = vcvtmd_u64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtp_u32_f32() {
-        let a: f32x2 = f32x2::new(1.1, 2.1);
-        let e: u32x2 = u32x2::new(2, 3);
-        let r: u32x2 = transmute(vcvtp_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(2, 3, 3, 4);
-        let r: u32x4 = transmute(vcvtpq_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtp_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64x1 = u64x1::new(2);
-        let r: u64x1 = transmute(vcvtp_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.1, 2.1);
-        let e: u64x2 = u64x2::new(2, 3);
-        let r: u64x2 = transmute(vcvtpq_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtps_u32_f32() {
-        let a: f32 = 1.1;
-        let e: u32 = 2;
-        let r: u32 = vcvtps_u32_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtpd_u64_f64() {
-        let a: f64 = 1.1;
-        let e: u64 = 2;
-        let r: u64 = vcvtpd_u64_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_p64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let e: i64x2 = i64x2::new(1, 1);
-        let r: i64x2 = transmute(vdupq_laneq_p64::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_p64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: i64x2 = i64x2::new(1, 1);
-        let r: i64x2 = transmute(vdupq_lane_p64::<0>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 1.);
-        let e: f64x2 = f64x2::new(1., 1.);
-        let r: f64x2 = transmute(vdupq_laneq_f64::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_f64() {
-        let a: f64 = 1.;
-        let e: f64x2 = f64x2::new(1., 1.);
-        let r: f64x2 = transmute(vdupq_lane_f64::<0>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vdup_lane_p64::<0>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_f64() {
-        let a: f64 = 0.;
-        let e: f64 = 0.;
-        let r: f64 = transmute(vdup_lane_f64::<0>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vdup_laneq_p64::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_f64() {
-        let a: f64x2 = f64x2::new(0., 1.);
-        let e: f64 = 1.;
-        let r: f64 = transmute(vdup_laneq_f64::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_lane_s8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8 = 1;
-        let r: i8 = vdupb_lane_s8::<4>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_laneq_s8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8 = 1;
-        let r: i8 = vdupb_laneq_s8::<8>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16 = 1;
-        let r: i16 = vduph_lane_s16::<2>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16 = 1;
-        let r: i16 = vduph_laneq_s16::<4>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let e: i32 = 1;
-        let r: i32 = vdups_lane_s32::<1>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 4);
-        let e: i32 = 1;
-        let r: i32 = vdups_laneq_s32::<2>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_lane_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: i64 = 1;
-        let r: i64 = vdupd_lane_s64::<0>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_laneq_s64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let e: i64 = 1;
-        let r: i64 = vdupd_laneq_s64::<1>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_lane_u8() {
-        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u8 = 1;
-        let r: u8 = vdupb_lane_u8::<4>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_laneq_u8() {
-        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8 = 1;
-        let r: u8 = vdupb_laneq_u8::<8>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 1, 1, 4);
-        let e: u16 = 1;
-        let r: u16 = vduph_lane_u16::<2>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u16 = 1;
-        let r: u16 = vduph_laneq_u16::<4>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let e: u32 = 1;
-        let r: u32 = vdups_lane_u32::<1>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 4);
-        let e: u32 = 1;
-        let r: u32 = vdups_laneq_u32::<2>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_lane_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let e: u64 = 1;
-        let r: u64 = vdupd_lane_u64::<0>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_laneq_u64() {
-        let a: u64x2 = u64x2::new(1, 1);
-        let e: u64 = 1;
-        let r: u64 = vdupd_laneq_u64::<1>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_lane_p8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: p8 = 1;
-        let r: p8 = vdupb_lane_p8::<4>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupb_laneq_p8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: p8 = 1;
-        let r: p8 = vdupb_laneq_p8::<8>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_lane_p16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: p16 = 1;
-        let r: p16 = vduph_lane_p16::<2>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vduph_laneq_p16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: p16 = 1;
-        let r: p16 = vduph_laneq_p16::<4>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let e: f32 = 1.;
-        let r: f32 = vdups_lane_f32::<1>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdups_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
-        let e: f32 = 1.;
-        let r: f32 = vdups_laneq_f32::<2>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_lane_f64() {
-        let a: f64 = 1.;
-        let e: f64 = 1.;
-        let r: f64 = vdupd_lane_f64::<0>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupd_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 1.);
-        let e: f64 = 1.;
-        let r: f64 = vdupd_laneq_f64::<1>(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_p64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vextq_p64::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_f64() {
-        let a: f64x2 = f64x2::new(1., 1.);
-        let b: f64x2 = f64x2::new(2., 2.);
-        let e: f64x2 = f64x2::new(1., 2.);
-        let r: f64x2 = transmute(vextq_f64::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_f64() {
-        let a: f64 = 0.;
-        let b: f64 = 2.;
-        let c: f64 = 3.;
-        let e: f64 = 6.;
-        let r: f64 = transmute(vmla_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_f64() {
-        let a: f64x2 = f64x2::new(0., 1.);
-        let b: f64x2 = f64x2::new(2., 2.);
-        let c: f64x2 = f64x2::new(3., 3.);
-        let e: f64x2 = f64x2::new(6., 7.);
-        let r: f64x2 = transmute(vmlaq_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_s8() {
-        let a: i16x8 = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i16x8 = transmute(vmlal_high_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_s16() {
-        let a: i32x4 = i32x4::new(8, 7, 6, 5);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let e: i32x4 = i32x4::new(8, 9, 10, 11);
-        let r: i32x4 = transmute(vmlal_high_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_s32() {
-        let a: i64x2 = i64x2::new(8, 7);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(3, 3, 0, 1);
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vmlal_high_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_u8() {
-        let a: u16x8 = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
-        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u16x8 = transmute(vmlal_high_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_u16() {
-        let a: u32x4 = u32x4::new(8, 7, 6, 5);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(8, 9, 10, 11);
-        let r: u32x4 = transmute(vmlal_high_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_u32() {
-        let a: u64x2 = u64x2::new(8, 7);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(3, 3, 0, 1);
-        let e: u64x2 = u64x2::new(8, 9);
-        let r: u64x2 = transmute(vmlal_high_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_n_s16() {
-        let a: i32x4 = i32x4::new(8, 7, 6, 5);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16 = 2;
-        let e: i32x4 = i32x4::new(8, 9, 10, 11);
-        let r: i32x4 = transmute(vmlal_high_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_n_s32() {
-        let a: i64x2 = i64x2::new(8, 7);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32 = 2;
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vmlal_high_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_n_u16() {
-        let a: u32x4 = u32x4::new(8, 7, 6, 5);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16 = 2;
-        let e: u32x4 = u32x4::new(8, 9, 10, 11);
-        let r: u32x4 = transmute(vmlal_high_n_u16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_n_u32() {
-        let a: u64x2 = u64x2::new(8, 7);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32 = 2;
-        let e: u64x2 = u64x2::new(8, 9);
-        let r: u64x2 = transmute(vmlal_high_n_u32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_lane_s16() {
-        let a: i32x4 = i32x4::new(8, 7, 6, 5);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(8, 9, 10, 11);
-        let r: i32x4 = transmute(vmlal_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_laneq_s16() {
-        let a: i32x4 = i32x4::new(8, 7, 6, 5);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(8, 9, 10, 11);
-        let r: i32x4 = transmute(vmlal_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_lane_s32() {
-        let a: i64x2 = i64x2::new(8, 7);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vmlal_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_laneq_s32() {
-        let a: i64x2 = i64x2::new(8, 7);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i64x2 = i64x2::new(8, 9);
-        let r: i64x2 = transmute(vmlal_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_lane_u16() {
-        let a: u32x4 = u32x4::new(8, 7, 6, 5);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16x4 = u16x4::new(0, 2, 0, 0);
-        let e: u32x4 = u32x4::new(8, 9, 10, 11);
-        let r: u32x4 = transmute(vmlal_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_laneq_u16() {
-        let a: u32x4 = u32x4::new(8, 7, 6, 5);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(8, 9, 10, 11);
-        let r: u32x4 = transmute(vmlal_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_lane_u32() {
-        let a: u64x2 = u64x2::new(8, 7);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32x2 = u32x2::new(0, 2);
-        let e: u64x2 = u64x2::new(8, 9);
-        let r: u64x2 = transmute(vmlal_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_high_laneq_u32() {
-        let a: u64x2 = u64x2::new(8, 7);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32x4 = u32x4::new(0, 2, 0, 0);
-        let e: u64x2 = u64x2::new(8, 9);
-        let r: u64x2 = transmute(vmlal_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_f64() {
-        let a: f64 = 6.;
-        let b: f64 = 2.;
-        let c: f64 = 3.;
-        let e: f64 = 0.;
-        let r: f64 = transmute(vmls_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_f64() {
-        let a: f64x2 = f64x2::new(6., 7.);
-        let b: f64x2 = f64x2::new(2., 2.);
-        let c: f64x2 = f64x2::new(3., 3.);
-        let e: f64x2 = f64x2::new(0., 1.);
-        let r: f64x2 = transmute(vmlsq_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_s8() {
-        let a: i16x8 = i16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
-        let r: i16x8 = transmute(vmlsl_high_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_s16() {
-        let a: i32x4 = i32x4::new(14, 15, 16, 17);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let e: i32x4 = i32x4::new(14, 13, 12, 11);
-        let r: i32x4 = transmute(vmlsl_high_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_s32() {
-        let a: i64x2 = i64x2::new(14, 15);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(3, 3, 0, 1);
-        let e: i64x2 = i64x2::new(14, 13);
-        let r: i64x2 = transmute(vmlsl_high_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_u8() {
-        let a: u16x8 = u16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
-        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
-        let r: u16x8 = transmute(vmlsl_high_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_u16() {
-        let a: u32x4 = u32x4::new(14, 15, 16, 17);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(14, 13, 12, 11);
-        let r: u32x4 = transmute(vmlsl_high_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_u32() {
-        let a: u64x2 = u64x2::new(14, 15);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(3, 3, 0, 1);
-        let e: u64x2 = u64x2::new(14, 13);
-        let r: u64x2 = transmute(vmlsl_high_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_n_s16() {
-        let a: i32x4 = i32x4::new(14, 15, 16, 17);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16 = 2;
-        let e: i32x4 = i32x4::new(14, 13, 12, 11);
-        let r: i32x4 = transmute(vmlsl_high_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_n_s32() {
-        let a: i64x2 = i64x2::new(14, 15);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32 = 2;
-        let e: i64x2 = i64x2::new(14, 13);
-        let r: i64x2 = transmute(vmlsl_high_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_n_u16() {
-        let a: u32x4 = u32x4::new(14, 15, 16, 17);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16 = 2;
-        let e: u32x4 = u32x4::new(14, 13, 12, 11);
-        let r: u32x4 = transmute(vmlsl_high_n_u16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_n_u32() {
-        let a: u64x2 = u64x2::new(14, 15);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32 = 2;
-        let e: u64x2 = u64x2::new(14, 13);
-        let r: u64x2 = transmute(vmlsl_high_n_u32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_lane_s16() {
-        let a: i32x4 = i32x4::new(14, 15, 16, 17);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(14, 13, 12, 11);
-        let r: i32x4 = transmute(vmlsl_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_laneq_s16() {
-        let a: i32x4 = i32x4::new(14, 15, 16, 17);
-        let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(14, 13, 12, 11);
-        let r: i32x4 = transmute(vmlsl_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_lane_s32() {
-        let a: i64x2 = i64x2::new(14, 15);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(14, 13);
-        let r: i64x2 = transmute(vmlsl_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_laneq_s32() {
-        let a: i64x2 = i64x2::new(14, 15);
-        let b: i32x4 = i32x4::new(3, 3, 0, 1);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i64x2 = i64x2::new(14, 13);
-        let r: i64x2 = transmute(vmlsl_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_lane_u16() {
-        let a: u32x4 = u32x4::new(14, 15, 16, 17);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16x4 = u16x4::new(0, 2, 0, 0);
-        let e: u32x4 = u32x4::new(14, 13, 12, 11);
-        let r: u32x4 = transmute(vmlsl_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_laneq_u16() {
-        let a: u32x4 = u32x4::new(14, 15, 16, 17);
-        let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
-        let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(14, 13, 12, 11);
-        let r: u32x4 = transmute(vmlsl_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_lane_u32() {
-        let a: u64x2 = u64x2::new(14, 15);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32x2 = u32x2::new(0, 2);
-        let e: u64x2 = u64x2::new(14, 13);
-        let r: u64x2 = transmute(vmlsl_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_high_laneq_u32() {
-        let a: u64x2 = u64x2::new(14, 15);
-        let b: u32x4 = u32x4::new(3, 3, 0, 1);
-        let c: u32x4 = u32x4::new(0, 2, 0, 0);
-        let e: u64x2 = u64x2::new(14, 13);
-        let r: u64x2 = transmute(vmlsl_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_s16() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
-        let b: i16x8 = i16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vmovn_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_s32() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(2, 3, 4, 5);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
-        let r: i16x8 = transmute(vmovn_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_s64() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i64x2 = i64x2::new(2, 3);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmovn_high_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_u16() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
-        let b: u16x8 = u16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vmovn_high_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_u32() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(2, 3, 4, 5);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
-        let r: u16x8 = transmute(vmovn_high_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_high_u64() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u64x2 = u64x2::new(2, 3);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmovn_high_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vneg_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i64x2 = i64x2::new(0, -1);
-        let r: i64x2 = transmute(vnegq_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vnegd_s64() {
-        let a: i64 = 1;
-        let e: i64 = -1;
-        let r: i64 = vnegd_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_f64() {
-        let a: f64 = 0.;
-        let e: f64 = 0.;
-        let r: f64 = transmute(vneg_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_f64() {
-        let a: f64x2 = f64x2::new(0., 1.);
-        let e: f64x2 = f64x2::new(0., -1.);
-        let r: f64x2 = transmute(vnegq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqneg_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x1 = transmute(vqneg_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 0);
-        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0);
-        let r: i64x2 = transmute(vqnegq_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegb_s8() {
-        let a: i8 = 1;
-        let e: i8 = -1;
-        let r: i8 = vqnegb_s8(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegh_s16() {
-        let a: i16 = 1;
-        let e: i16 = -1;
-        let r: i16 = vqnegh_s16(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegs_s32() {
-        let a: i32 = 1;
-        let e: i32 = -1;
-        let r: i32 = vqnegs_s32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegd_s64() {
-        let a: i64 = 1;
-        let e: i64 = -1;
-        let r: i64 = vqnegd_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubb_s8() {
-        let a: i8 = 42;
-        let b: i8 = 1;
-        let e: i8 = 41;
-        let r: i8 = vqsubb_s8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubh_s16() {
-        let a: i16 = 42;
-        let b: i16 = 1;
-        let e: i16 = 41;
-        let r: i16 = vqsubh_s16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubb_u8() {
-        let a: u8 = 42;
-        let b: u8 = 1;
-        let e: u8 = 41;
-        let r: u8 = vqsubb_u8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubh_u16() {
-        let a: u16 = 42;
-        let b: u16 = 1;
-        let e: u16 = 41;
-        let r: u16 = vqsubh_u16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubs_u32() {
-        let a: u32 = 42;
-        let b: u32 = 1;
-        let e: u32 = 41;
-        let r: u32 = vqsubs_u32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubd_u64() {
-        let a: u64 = 42;
-        let b: u64 = 1;
-        let e: u64 = 41;
-        let r: u64 = vqsubd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubs_s32() {
-        let a: i32 = 42;
-        let b: i32 = 1;
-        let e: i32 = 41;
-        let r: i32 = vqsubs_s32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubd_s64() {
-        let a: i64 = 42;
-        let b: i64 = 1;
-        let e: i64 = 41;
-        let r: i64 = vqsubd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrbit_s8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
-        let r: i8x8 = transmute(vrbit_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrbitq_s8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
-        let r: i8x16 = transmute(vrbitq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrbit_u8() {
-        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let e: u8x8 = u8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
-        let r: u8x8 = transmute(vrbit_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrbitq_u8() {
-        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let e: u8x16 = u8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
-        let r: u8x16 = transmute(vrbitq_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrbit_p8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
-        let r: i8x8 = transmute(vrbit_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrbitq_p8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
-        let r: i8x16 = transmute(vrbitq_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndx_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndx_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndxq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
-        let r: f32x4 = transmute(vrndxq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndx_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrndx_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndxq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 0.0);
-        let r: f64x2 = transmute(vrndxq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrnda_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 1.0);
-        let r: f32x2 = transmute(vrnda_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndaq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 1.0, 2.0, 3.0);
-        let r: f32x4 = transmute(vrndaq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrnda_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrnda_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndaq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 1.0);
-        let r: f64x2 = transmute(vrndaq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndn_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrndn_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndnq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 0.0);
-        let r: f64x2 = transmute(vrndnq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndns_f32() {
-        let a: f32 = -1.5;
-        let e: f32 = -2.0;
-        let r: f32 = vrndns_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndm_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndm_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndmq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 1.0, 2.0);
-        let r: f32x4 = transmute(vrndmq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndm_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrndm_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndmq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 0.0);
-        let r: f64x2 = transmute(vrndmq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndp_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-1.0, 1.0);
-        let r: f32x2 = transmute(vrndp_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndpq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-1.0, 1.0, 2.0, 3.0);
-        let r: f32x4 = transmute(vrndpq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndp_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -1.0;
-        let r: f64 = transmute(vrndp_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndpq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-1.0, 1.0);
-        let r: f64x2 = transmute(vrndpq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrnd_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-1.0, 0.0);
-        let r: f32x2 = transmute(vrnd_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-1.0, 0.0, 1.0, 2.0);
-        let r: f32x4 = transmute(vrndq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrnd_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -1.0;
-        let r: f64 = transmute(vrnd_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-1.0, 0.0);
-        let r: f64x2 = transmute(vrndq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndi_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndi_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndiq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
-        let r: f32x4 = transmute(vrndiq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndi_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrndi_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndiq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 0.5);
-        let e: f64x2 = f64x2::new(-2.0, 0.0);
-        let r: f64x2 = transmute(vrndiq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddb_s8() {
-        let a: i8 = 42;
-        let b: i8 = 1;
-        let e: i8 = 43;
-        let r: i8 = vqaddb_s8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddh_s16() {
-        let a: i16 = 42;
-        let b: i16 = 1;
-        let e: i16 = 43;
-        let r: i16 = vqaddh_s16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddb_u8() {
-        let a: u8 = 42;
-        let b: u8 = 1;
-        let e: u8 = 43;
-        let r: u8 = vqaddb_u8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddh_u16() {
-        let a: u16 = 42;
-        let b: u16 = 1;
-        let e: u16 = 43;
-        let r: u16 = vqaddh_u16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadds_u32() {
-        let a: u32 = 42;
-        let b: u32 = 1;
-        let e: u32 = 43;
-        let r: u32 = vqadds_u32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddd_u64() {
-        let a: u64 = 42;
-        let b: u64 = 1;
-        let e: u64 = 43;
-        let r: u64 = vqaddd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadds_s32() {
-        let a: i32 = 42;
-        let b: i32 = 1;
-        let e: i32 = 43;
-        let r: i32 = vqadds_s32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddd_s64() {
-        let a: i64 = 42;
-        let b: i64 = 1;
-        let e: i64 = 43;
-        let r: i64 = vqaddd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f64_x2() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let r: [f64; 2] = transmute(vld1_f64_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f64_x2() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(3., 4.)];
-        let r: [f64x2; 2] = transmute(vld1q_f64_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f64_x3() {
-        let a: [f64; 4] = [0., 1., 2., 3.];
-        let e: [f64; 3] = [1., 2., 3.];
-        let r: [f64; 3] = transmute(vld1_f64_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f64_x3() {
-        let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
-        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.)];
-        let r: [f64x2; 3] = transmute(vld1q_f64_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f64_x4() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let e: [f64; 4] = [1., 2., 3., 4.];
-        let r: [f64; 4] = transmute(vld1_f64_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f64_x4() {
-        let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
-        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.), f64x2::new(7., 8.)];
-        let r: [f64x2; 4] = transmute(vld1q_f64_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 3];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 3)];
-        let r: [i64x2; 2] = transmute(vld2q_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(2, 3)];
-        let r: [u64x2; 2] = transmute(vld2q_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 3)];
-        let r: [i64x2; 2] = transmute(vld2q_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_f64() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let r: [f64; 2] = transmute(vld2_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 3.];
-        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(2., 3.)];
-        let r: [f64x2; 2] = transmute(vld2q_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_s64() {
-        let a: [i64; 5] = [0, 1, 1, 2, 3];
-        let e: [i64x2; 2] = [i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 2] = transmute(vld2q_dup_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_u64() {
-        let a: [u64; 5] = [0, 1, 1, 2, 3];
-        let e: [u64x2; 2] = [u64x2::new(1, 1), u64x2::new(1, 1)];
-        let r: [u64x2; 2] = transmute(vld2q_dup_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_p64() {
-        let a: [u64; 5] = [0, 1, 1, 2, 3];
-        let e: [i64x2; 2] = [i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 2] = transmute(vld2q_dup_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_f64() {
-        let a: [f64; 3] = [0., 1., 1.];
-        let e: [f64; 2] = [1., 1.];
-        let r: [f64; 2] = transmute(vld2_dup_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_f64() {
-        let a: [f64; 5] = [0., 1., 1., 2., 3.];
-        let e: [f64x2; 2] = [f64x2::new(1., 1.), f64x2::new(1., 1.)];
-        let r: [f64x2; 2] = transmute(vld2q_dup_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_s8() {
-        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x16; 2] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i8x16; 2] = transmute(vld2q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_s64() {
-        let a: [i64; 3] = [0, 1, 2];
-        let b: [i64x1; 2] = [i64x1::new(0), i64x1::new(2)];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
-        let r: [i64x1; 2] = transmute(vld2_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_s64() {
-        let a: [i64; 5] = [0, 1, 2, 3, 4];
-        let b: [i64x2; 2] = [i64x2::new(0, 2), i64x2::new(2, 14)];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 14)];
-        let r: [i64x2; 2] = transmute(vld2q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_p64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let b: [i64x1; 2] = [i64x1::new(0), i64x1::new(2)];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
-        let r: [i64x1; 2] = transmute(vld2_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_p64() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let b: [i64x2; 2] = [i64x2::new(0, 2), i64x2::new(2, 14)];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(2, 14)];
-        let r: [i64x2; 2] = transmute(vld2q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_u8() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u8x16; 2] = [u8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [u8x16; 2] = [u8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [u8x16; 2] = transmute(vld2q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_u64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let b: [u64x1; 2] = [u64x1::new(0), u64x1::new(2)];
-        let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(2)];
-        let r: [u64x1; 2] = transmute(vld2_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_u64() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let b: [u64x2; 2] = [u64x2::new(0, 2), u64x2::new(2, 14)];
-        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(2, 14)];
-        let r: [u64x2; 2] = transmute(vld2q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_p8() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x16; 2] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i8x16; 2] = transmute(vld2q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_f64() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let b: [f64; 2] = [0., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let r: [f64; 2] = transmute(vld2_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_f64() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let b: [f64x2; 2] = [f64x2::new(0., 2.), f64x2::new(2., 14.)];
-        let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(2., 14.)];
-        let r: [f64x2; 2] = transmute(vld2q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_s64() {
-        let a: [i64; 7] = [0, 1, 2, 2, 2, 4, 4];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 4), i64x2::new(2, 4)];
-        let r: [i64x2; 3] = transmute(vld3q_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_u64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 2, 4, 4];
-        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(2, 4), u64x2::new(2, 4)];
-        let r: [u64x2; 3] = transmute(vld3q_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_p64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 2, 4, 4];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 4), i64x2::new(2, 4)];
-        let r: [i64x2; 3] = transmute(vld3q_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_f64() {
-        let a: [f64; 4] = [0., 1., 2., 2.];
-        let e: [f64; 3] = [1., 2., 2.];
-        let r: [f64; 3] = transmute(vld3_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_f64() {
-        let a: [f64; 7] = [0., 1., 2., 2., 2., 4., 4.];
-        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(2., 4.), f64x2::new(2., 4.)];
-        let r: [f64x2; 3] = transmute(vld3q_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_s64() {
-        let a: [i64; 7] = [0, 1, 1, 1, 3, 1, 4];
-        let e: [i64x2; 3] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 3] = transmute(vld3q_dup_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_u64() {
-        let a: [u64; 7] = [0, 1, 1, 1, 3, 1, 4];
-        let e: [u64x2; 3] = [u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1)];
-        let r: [u64x2; 3] = transmute(vld3q_dup_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_p64() {
-        let a: [u64; 7] = [0, 1, 1, 1, 3, 1, 4];
-        let e: [i64x2; 3] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 3] = transmute(vld3q_dup_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_f64() {
-        let a: [f64; 4] = [0., 1., 1., 1.];
-        let e: [f64; 3] = [1., 1., 1.];
-        let r: [f64; 3] = transmute(vld3_dup_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_f64() {
-        let a: [f64; 7] = [0., 1., 1., 1., 3., 1., 4.];
-        let e: [f64x2; 3] = [f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.)];
-        let r: [f64x2; 3] = transmute(vld3q_dup_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_s8() {
-        let a: [i8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x16; 3] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let r: [i8x16; 3] = transmute(vld3q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_s64() {
-        let a: [i64; 4] = [0, 1, 2, 2];
-        let b: [i64x1; 3] = [i64x1::new(0), i64x1::new(2), i64x1::new(2)];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 3] = transmute(vld3_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_s64() {
-        let a: [i64; 7] = [0, 1, 2, 2, 4, 5, 6];
-        let b: [i64x2; 3] = [i64x2::new(0, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
-        let r: [i64x2; 3] = transmute(vld3q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_p64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let b: [i64x1; 3] = [i64x1::new(0), i64x1::new(2), i64x1::new(2)];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 3] = transmute(vld3_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_p64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 5, 6];
-        let b: [i64x2; 3] = [i64x2::new(0, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(2, 14), i64x2::new(2, 16)];
-        let r: [i64x2; 3] = transmute(vld3q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_p8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x16; 3] = [i8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let r: [i8x16; 3] = transmute(vld3q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_u8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u8x16; 3] = [u8x16::new(0, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let e: [u8x16; 3] = [u8x16::new(1, 2, 2, 14, 2, 16, 17, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)];
-        let r: [u8x16; 3] = transmute(vld3q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_u64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let b: [u64x1; 3] = [u64x1::new(0), u64x1::new(2), u64x1::new(2)];
-        let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(2), u64x1::new(2)];
-        let r: [u64x1; 3] = transmute(vld3_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_u64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 5, 6];
-        let b: [u64x2; 3] = [u64x2::new(0, 2), u64x2::new(2, 14), u64x2::new(2, 16)];
-        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(2, 14), u64x2::new(2, 16)];
-        let r: [u64x2; 3] = transmute(vld3q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_f64() {
-        let a: [f64; 4] = [0., 1., 2., 2.];
-        let b: [f64; 3] = [0., 2., 2.];
-        let e: [f64; 3] = [1., 2., 2.];
-        let r: [f64; 3] = transmute(vld3_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_f64() {
-        let a: [f64; 7] = [0., 1., 2., 2., 4., 5., 6.];
-        let b: [f64x2; 3] = [f64x2::new(0., 2.), f64x2::new(2., 14.), f64x2::new(9., 16.)];
-        let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(2., 14.), f64x2::new(2., 16.)];
-        let r: [f64x2; 3] = transmute(vld3q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_s64() {
-        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 6), i64x2::new(2, 6), i64x2::new(6, 8)];
-        let r: [i64x2; 4] = transmute(vld4q_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_u64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(2, 6), u64x2::new(2, 6), u64x2::new(6, 8)];
-        let r: [u64x2; 4] = transmute(vld4q_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_p64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 6), i64x2::new(2, 6), i64x2::new(6, 8)];
-        let r: [i64x2; 4] = transmute(vld4q_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 6.];
-        let e: [f64; 4] = [1., 2., 2., 6.];
-        let r: [f64; 4] = transmute(vld4_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_f64() {
-        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
-        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(2., 6.), f64x2::new(2., 6.), f64x2::new(6., 8.)];
-        let r: [f64x2; 4] = transmute(vld4q_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_s64() {
-        let a: [i64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
-        let e: [i64x2; 4] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 4] = transmute(vld4q_dup_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_u64() {
-        let a: [u64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
-        let e: [u64x2; 4] = [u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1), u64x2::new(1, 1)];
-        let r: [u64x2; 4] = transmute(vld4q_dup_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_p64() {
-        let a: [u64; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
-        let e: [i64x2; 4] = [i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1), i64x2::new(1, 1)];
-        let r: [i64x2; 4] = transmute(vld4q_dup_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_f64() {
-        let a: [f64; 5] = [0., 1., 1., 1., 1.];
-        let e: [f64; 4] = [1., 1., 1., 1.];
-        let r: [f64; 4] = transmute(vld4_dup_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_f64() {
-        let a: [f64; 9] = [0., 1., 1., 1., 1., 6., 4., 3., 5.];
-        let e: [f64x2; 4] = [f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.), f64x2::new(1., 1.)];
-        let r: [f64x2; 4] = transmute(vld4q_dup_f64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_s8() {
-        let a: [i8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
-        let b: [i8x16; 4] = [i8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [i8x16; 4] = transmute(vld4q_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 2];
-        let b: [i64x1; 4] = [i64x1::new(0), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 4] = transmute(vld4_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_s64() {
-        let a: [i64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
-        let b: [i64x2; 4] = [i64x2::new(0, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
-        let r: [i64x2; 4] = transmute(vld4q_lane_s64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 2];
-        let b: [i64x1; 4] = [i64x1::new(0), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 4] = transmute(vld4_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_p64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
-        let b: [i64x2; 4] = [i64x2::new(0, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(2, 2), i64x2::new(2, 16), i64x2::new(2, 18)];
-        let r: [i64x2; 4] = transmute(vld4q_lane_p64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_p8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
-        let b: [i8x16; 4] = [i8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), i8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), i8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), i8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [i8x16; 4] = transmute(vld4q_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_u8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16];
-        let b: [u8x16; 4] = [u8x16::new(0, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), u8x16::new(11, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), u8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let e: [u8x16; 4] = [u8x16::new(1, 2, 2, 2, 2, 16, 2, 18, 2, 20, 21, 22, 2, 24, 25, 26), u8x16::new(2, 12, 13, 14, 15, 16, 2, 18, 2, 20, 21, 22, 23, 24, 25, 26), u8x16::new(2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8), u8x16::new(2, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [u8x16; 4] = transmute(vld4q_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 2];
-        let b: [u64x1; 4] = [u64x1::new(0), u64x1::new(2), u64x1::new(2), u64x1::new(2)];
-        let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(2), u64x1::new(2), u64x1::new(2)];
-        let r: [u64x1; 4] = transmute(vld4_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_u64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
-        let b: [u64x2; 4] = [u64x2::new(0, 2), u64x2::new(2, 2), u64x2::new(2, 16), u64x2::new(2, 18)];
-        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(2, 2), u64x2::new(2, 16), u64x2::new(2, 18)];
-        let r: [u64x2; 4] = transmute(vld4q_lane_u64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 2.];
-        let b: [f64; 4] = [0., 2., 2., 2.];
-        let e: [f64; 4] = [1., 2., 2., 2.];
-        let r: [f64; 4] = transmute(vld4_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_f64() {
-        let a: [f64; 9] = [0., 1., 2., 2., 2., 5., 6., 7., 8.];
-        let b: [f64x2; 4] = [f64x2::new(0., 2.), f64x2::new(2., 2.), f64x2::new(2., 16.), f64x2::new(2., 18.)];
-        let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(2., 2.), f64x2::new(2., 16.), f64x2::new(2., 18.)];
-        let r: [f64x2; 4] = transmute(vld4q_lane_f64::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_f64() {
-        let a: [f64; 2] = [0., 1.];
-        let e: [f64; 1] = [1.];
-        let mut r: [f64; 1] = [0f64; 1];
-        vst1_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_f64() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 0.];
-        let mut r: [f64; 2] = [0f64; 2];
-        vst1q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f64_x2() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let mut r: [f64; 2] = [0f64; 2];
-        vst1_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f64_x2() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let e: [f64; 4] = [1., 2., 3., 4.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst1q_f64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f64_x3() {
-        let a: [f64; 4] = [0., 1., 2., 3.];
-        let e: [f64; 3] = [1., 2., 3.];
-        let mut r: [f64; 3] = [0f64; 3];
-        vst1_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f64_x3() {
-        let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
-        let e: [f64; 6] = [1., 2., 3., 4., 5., 6.];
-        let mut r: [f64; 6] = [0f64; 6];
-        vst1q_f64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f64_x4() {
-        let a: [f64; 5] = [0., 1., 2., 3., 4.];
-        let e: [f64; 4] = [1., 2., 3., 4.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst1_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f64_x4() {
-        let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
-        let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
-        let mut r: [f64; 8] = [0f64; 8];
-        vst1q_f64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 3];
-        let e: [i64; 4] = [1, 2, 2, 3];
-        let mut r: [i64; 4] = [0i64; 4];
-        vst2q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64; 4] = [1, 2, 2, 3];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst2q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64; 4] = [1, 2, 2, 3];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst2q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_f64() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let mut r: [f64; 2] = [0f64; 2];
-        vst2_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 3.];
-        let e: [f64; 4] = [1., 2., 2., 3.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst2q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_s8() {
-        let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
-        let e: [i8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 32] = [0i8; 32];
-        vst2q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_s64() {
-        let a: [i64; 3] = [0, 1, 2];
-        let e: [i64; 2] = [1, 2];
-        let mut r: [i64; 2] = [0i64; 2];
-        vst2_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 3];
-        let e: [i64; 4] = [1, 2, 0, 0];
-        let mut r: [i64; 4] = [0i64; 4];
-        vst2q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_u8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
-        let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst2q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_u64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 2];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst2_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64; 4] = [1, 2, 0, 0];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst2q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_p8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
-        let e: [u8; 32] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst2q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_p64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 2];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst2_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 3];
-        let e: [u64; 4] = [1, 2, 0, 0];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst2q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_f64() {
-        let a: [f64; 3] = [0., 1., 2.];
-        let e: [f64; 2] = [1., 2.];
-        let mut r: [f64; 2] = [0f64; 2];
-        vst2_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 3.];
-        let e: [f64; 4] = [1., 2., 0., 0.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst2q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_s64() {
-        let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [i64; 6] = [1, 2, 2, 2, 4, 4];
-        let mut r: [i64; 6] = [0i64; 6];
-        vst3q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_u64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst3q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_p64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u64; 6] = [1, 2, 2, 2, 4, 4];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst3q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_f64() {
-        let a: [f64; 4] = [0., 1., 2., 2.];
-        let e: [f64; 3] = [1., 2., 2.];
-        let mut r: [f64; 3] = [0f64; 3];
-        vst3_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_f64() {
-        let a: [f64; 7] = [0., 1., 2., 2., 4., 2., 4.];
-        let e: [f64; 6] = [1., 2., 2., 2., 4., 4.];
-        let mut r: [f64; 6] = [0f64; 6];
-        vst3q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_s8() {
-        let a: [i8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
-        let e: [i8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 48] = [0i8; 48];
-        vst3q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_s64() {
-        let a: [i64; 4] = [0, 1, 2, 2];
-        let e: [i64; 3] = [1, 2, 2];
-        let mut r: [i64; 3] = [0i64; 3];
-        vst3_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_s64() {
-        let a: [i64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [i64; 6] = [1, 2, 2, 0, 0, 0];
-        let mut r: [i64; 6] = [0i64; 6];
-        vst3q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_u8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
-        let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 48] = [0u8; 48];
-        vst3q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_u64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let e: [u64; 3] = [1, 2, 2];
-        let mut r: [u64; 3] = [0u64; 3];
-        vst3_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_u64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst3q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_p8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
-        let e: [u8; 48] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 48] = [0u8; 48];
-        vst3q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_p64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let e: [u64; 3] = [1, 2, 2];
-        let mut r: [u64; 3] = [0u64; 3];
-        vst3_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_p64() {
-        let a: [u64; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u64; 6] = [1, 2, 2, 0, 0, 0];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst3q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_f64() {
-        let a: [f64; 4] = [0., 1., 2., 2.];
-        let e: [f64; 3] = [1., 2., 2.];
-        let mut r: [f64; 3] = [0f64; 3];
-        vst3_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_f64() {
-        let a: [f64; 7] = [0., 1., 2., 2., 3., 2., 3.];
-        let e: [f64; 6] = [1., 2., 2., 0., 0., 0.];
-        let mut r: [f64; 6] = [0f64; 6];
-        vst3q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_s64() {
-        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
-        let mut r: [i64; 8] = [0i64; 8];
-        vst4q_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_u64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst4q_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_p64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst4q_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 6.];
-        let e: [f64; 4] = [1., 2., 2., 6.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst4_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_f64() {
-        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
-        let e: [f64; 8] = [1., 2., 2., 6., 2., 6., 6., 8.];
-        let mut r: [f64; 8] = [0f64; 8];
-        vst4q_f64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_s8() {
-        let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [i8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 64] = [0i8; 64];
-        vst4q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 6];
-        let e: [i64; 4] = [1, 2, 2, 6];
-        let mut r: [i64; 4] = [0i64; 4];
-        vst4_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_s64() {
-        let a: [i64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
-        let mut r: [i64; 8] = [0i64; 8];
-        vst4q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_u8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 64] = [0u8; 64];
-        vst4q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 6];
-        let e: [u64; 4] = [1, 2, 2, 6];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst4_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_u64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst4q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_p8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [u8; 64] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 64] = [0u8; 64];
-        vst4q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 6];
-        let e: [u64; 4] = [1, 2, 2, 6];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst4_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_p64() {
-        let a: [u64; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u64; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst4q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_f64() {
-        let a: [f64; 5] = [0., 1., 2., 2., 6.];
-        let e: [f64; 4] = [1., 2., 2., 6.];
-        let mut r: [f64; 4] = [0f64; 4];
-        vst4_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_f64() {
-        let a: [f64; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
-        let e: [f64; 8] = [1., 2., 2., 6., 0., 0., 0., 0.];
-        let mut r: [f64; 8] = [0f64; 8];
-        vst4q_lane_f64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vusdot_laneq_s32() {
-        let a: i32x2 = i32x2::new(1000, -4200);
-        let b: u8x8 = u8x8::new(100, 110, 120, 130, 140, 150, 160, 170);
-        let c: i8x16 = i8x16::new(4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11);
-        let e: i32x2 = i32x2::new(-3420, -10140);
-        let r: i32x2 = transmute(vusdot_laneq_s32::<3>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vusdotq_laneq_s32() {
-        let a: i32x4 = i32x4::new(1000, -4200, -1000, 2000);
-        let b: u8x16 = u8x16::new(100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250);
-        let c: i8x16 = i8x16::new(4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11);
-        let e: i32x4 = i32x4::new(-3420, -10140, -8460, -6980);
-        let r: i32x4 = transmute(vusdotq_laneq_s32::<3>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vsudot_laneq_s32() {
-        let a: i32x2 = i32x2::new(-2000, 4200);
-        let b: i8x8 = i8x8::new(4, 3, 2, 1, 0, -1, -2, -3);
-        let c: u8x16 = u8x16::new(100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250);
-        let e: i32x2 = i32x2::new(300, 2740);
-        let r: i32x2 = transmute(vsudot_laneq_s32::<3>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vsudotq_laneq_s32() {
-        let a: i32x4 = i32x4::new(-2000, 4200, -1000, 2000);
-        let b: i8x16 = i8x16::new(4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11);
-        let c: u8x16 = u8x16::new(100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250);
-        let e: i32x4 = i32x4::new(300, 2740, -6220, -6980);
-        let r: i32x4 = transmute(vsudotq_laneq_s32::<3>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 2.0;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vmul_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let b: f64x2 = f64x2::new(2.0, 3.0);
-        let e: f64x2 = f64x2::new(2.0, 6.0);
-        let r: f64x2 = transmute(vmulq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_n_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmul_n_f64(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_n_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64 = 2.;
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulq_n_f64(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_lane_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmul_lane_f64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_laneq_f64() {
-        let a: f64 = 1.;
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmul_laneq_f64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_lane_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64 = 2.;
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulq_lane_f64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulq_laneq_f64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmuls_lane_f32() {
-        let a: f32 = 1.;
-        let b: f32x2 = f32x2::new(2., 0.);
-        let e: f32 = 2.;
-        let r: f32 = vmuls_lane_f32::<0>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmuls_laneq_f32() {
-        let a: f32 = 1.;
-        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32 = 2.;
-        let r: f32 = vmuls_laneq_f32::<0>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmuld_lane_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = vmuld_lane_f64::<0>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmuld_laneq_f64() {
-        let a: f64 = 1.;
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = vmuld_laneq_f64::<0>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i16x8 = i16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
-        let r: i16x8 = transmute(vmull_high_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i32x4 = i32x4::new(9, 20, 11, 24);
-        let r: i32x4 = transmute(vmull_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 9, 10);
-        let b: i32x4 = i32x4::new(1, 2, 1, 2);
-        let e: i64x2 = i64x2::new(9, 20);
-        let r: i64x2 = transmute(vmull_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u16x8 = u16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
-        let r: u16x8 = transmute(vmull_high_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u32x4 = u32x4::new(9, 20, 11, 24);
-        let r: u32x4 = transmute(vmull_high_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 9, 10);
-        let b: u32x4 = u32x4::new(1, 2, 1, 2);
-        let e: u64x2 = u64x2::new(9, 20);
-        let r: u64x2 = transmute(vmull_high_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vmull_p64() {
-        let a: p64 = 15;
-        let b: p64 = 3;
-        let e: p128 = 17;
-        let r: p128 = vmull_p64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_p8() {
-        let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3);
-        let e: i16x8 = i16x8::new(9, 30, 11, 20, 13, 18, 15, 48);
-        let r: i16x8 = transmute(vmull_high_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vmull_high_p64() {
-        let a: i64x2 = i64x2::new(1, 15);
-        let b: i64x2 = i64x2::new(1, 3);
-        let e: p128 = 17;
-        let r: p128 = vmull_high_p64(transmute(a), transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_n_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: i16 = 2;
-        let e: i32x4 = i32x4::new(18, 20, 22, 24);
-        let r: i32x4 = transmute(vmull_high_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_n_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 9, 10);
-        let b: i32 = 2;
-        let e: i64x2 = i64x2::new(18, 20);
-        let r: i64x2 = transmute(vmull_high_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_n_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: u16 = 2;
-        let e: u32x4 = u32x4::new(18, 20, 22, 24);
-        let r: u32x4 = transmute(vmull_high_n_u16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_n_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 9, 10);
-        let b: u32 = 2;
-        let e: u64x2 = u64x2::new(18, 20);
-        let r: u64x2 = transmute(vmull_high_n_u32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_lane_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(18, 20, 22, 24);
-        let r: i32x4 = transmute(vmull_high_lane_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(18, 20, 22, 24);
-        let r: i32x4 = transmute(vmull_high_laneq_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_lane_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 9, 10);
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(18, 20);
-        let r: i64x2 = transmute(vmull_high_lane_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 9, 10);
-        let b: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i64x2 = i64x2::new(18, 20);
-        let r: i64x2 = transmute(vmull_high_laneq_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_lane_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: u16x4 = u16x4::new(0, 2, 0, 0);
-        let e: u32x4 = u32x4::new(18, 20, 22, 24);
-        let r: u32x4 = transmute(vmull_high_lane_u16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
-        let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(18, 20, 22, 24);
-        let r: u32x4 = transmute(vmull_high_laneq_u16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_lane_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 9, 10);
-        let b: u32x2 = u32x2::new(0, 2);
-        let e: u64x2 = u64x2::new(18, 20);
-        let r: u64x2 = transmute(vmull_high_lane_u32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_high_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 9, 10);
-        let b: u32x4 = u32x4::new(0, 2, 0, 0);
-        let e: u64x2 = u64x2::new(18, 20);
-        let r: u64x2 = transmute(vmull_high_laneq_u32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let e: f32x2 = f32x2::new(2., 4.);
-        let r: f32x2 = transmute(vmulx_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
-        let r: f32x4 = transmute(vmulxq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmulx_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64x2 = f64x2::new(2., 2.);
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulxq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_lane_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmulx_lane_f64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_laneq_f64() {
-        let a: f64 = 1.;
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = transmute(vmulx_laneq_f64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(2., 0.);
-        let e: f32x2 = f32x2::new(2., 4.);
-        let r: f32x2 = transmute(vmulx_lane_f32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulx_laneq_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x2 = f32x2::new(2., 4.);
-        let r: f32x2 = transmute(vmulx_laneq_f32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_lane_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x2 = f32x2::new(2., 0.);
-        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
-        let r: f32x4 = transmute(vmulxq_lane_f32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
-        let r: f32x4 = transmute(vmulxq_laneq_f32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_lane_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64 = 2.;
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulxq_lane_f64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxq_laneq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64x2 = f64x2::new(2., 0.);
-        let e: f64x2 = f64x2::new(2., 4.);
-        let r: f64x2 = transmute(vmulxq_laneq_f64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxs_f32() {
-        let a: f32 = 2.;
-        let b: f32 = 3.;
-        let e: f32 = 6.;
-        let r: f32 = vmulxs_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxd_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 3.;
-        let e: f64 = 6.;
-        let r: f64 = vmulxd_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxs_lane_f32() {
-        let a: f32 = 2.;
-        let b: f32x2 = f32x2::new(3., 0.);
-        let e: f32 = 6.;
-        let r: f32 = vmulxs_lane_f32::<0>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxs_laneq_f32() {
-        let a: f32 = 2.;
-        let b: f32x4 = f32x4::new(3., 0., 0., 0.);
-        let e: f32 = 6.;
-        let r: f32 = vmulxs_laneq_f32::<0>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxd_lane_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 3.;
-        let e: f64 = 6.;
-        let r: f64 = vmulxd_lane_f64::<0>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulxd_laneq_f64() {
-        let a: f64 = 2.;
-        let b: f64x2 = f64x2::new(3., 0.);
-        let e: f64 = 6.;
-        let r: f64 = vmulxd_laneq_f64::<0>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_f64() {
-        let a: f64 = 8.0;
-        let b: f64 = 6.0;
-        let c: f64 = 2.0;
-        let e: f64 = 20.0;
-        let r: f64 = transmute(vfma_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_f64() {
-        let a: f64x2 = f64x2::new(8.0, 18.0);
-        let b: f64x2 = f64x2::new(6.0, 4.0);
-        let c: f64x2 = f64x2::new(2.0, 3.0);
-        let e: f64x2 = f64x2::new(20.0, 30.0);
-        let r: f64x2 = transmute(vfmaq_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_n_f64() {
-        let a: f64 = 2.0;
-        let b: f64 = 6.0;
-        let c: f64 = 8.0;
-        let e: f64 = 50.0;
-        let r: f64 = transmute(vfma_n_f64(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_n_f64() {
-        let a: f64x2 = f64x2::new(2.0, 3.0);
-        let b: f64x2 = f64x2::new(6.0, 4.0);
-        let c: f64 = 8.0;
-        let e: f64x2 = f64x2::new(50.0, 35.0);
-        let r: f64x2 = transmute(vfmaq_n_f64(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_lane_f32() {
-        let a: f32x2 = f32x2::new(2., 3.);
-        let b: f32x2 = f32x2::new(6., 4.);
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32x2 = f32x2::new(14., 11.);
-        let r: f32x2 = transmute(vfma_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_laneq_f32() {
-        let a: f32x2 = f32x2::new(2., 3.);
-        let b: f32x2 = f32x2::new(6., 4.);
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x2 = f32x2::new(14., 11.);
-        let r: f32x2 = transmute(vfma_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_lane_f32() {
-        let a: f32x4 = f32x4::new(2., 3., 4., 5.);
-        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32x4 = f32x4::new(14., 11., 18., 21.);
-        let r: f32x4 = transmute(vfmaq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_laneq_f32() {
-        let a: f32x4 = f32x4::new(2., 3., 4., 5.);
-        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x4 = f32x4::new(14., 11., 18., 21.);
-        let r: f32x4 = transmute(vfmaq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_lane_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 6.;
-        let c: f64 = 2.;
-        let e: f64 = 14.;
-        let r: f64 = transmute(vfma_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfma_laneq_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 6.;
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 14.;
-        let r: f64 = transmute(vfma_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_lane_f64() {
-        let a: f64x2 = f64x2::new(2., 3.);
-        let b: f64x2 = f64x2::new(6., 4.);
-        let c: f64 = 2.;
-        let e: f64x2 = f64x2::new(14., 11.);
-        let r: f64x2 = transmute(vfmaq_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmaq_laneq_f64() {
-        let a: f64x2 = f64x2::new(2., 3.);
-        let b: f64x2 = f64x2::new(6., 4.);
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64x2 = f64x2::new(14., 11.);
-        let r: f64x2 = transmute(vfmaq_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmas_lane_f32() {
-        let a: f32 = 2.;
-        let b: f32 = 6.;
-        let c: f32x2 = f32x2::new(3., 0.);
-        let e: f32 = 20.;
-        let r: f32 = vfmas_lane_f32::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmas_laneq_f32() {
-        let a: f32 = 2.;
-        let b: f32 = 6.;
-        let c: f32x4 = f32x4::new(3., 0., 0., 0.);
-        let e: f32 = 20.;
-        let r: f32 = vfmas_laneq_f32::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmad_lane_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 6.;
-        let c: f64 = 3.;
-        let e: f64 = 20.;
-        let r: f64 = vfmad_lane_f64::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmad_laneq_f64() {
-        let a: f64 = 2.;
-        let b: f64 = 6.;
-        let c: f64x2 = f64x2::new(3., 0.);
-        let e: f64 = 20.;
-        let r: f64 = vfmad_laneq_f64::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_f64() {
-        let a: f64 = 20.0;
-        let b: f64 = 6.0;
-        let c: f64 = 2.0;
-        let e: f64 = 8.0;
-        let r: f64 = transmute(vfms_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_f64() {
-        let a: f64x2 = f64x2::new(20.0, 30.0);
-        let b: f64x2 = f64x2::new(6.0, 4.0);
-        let c: f64x2 = f64x2::new(2.0, 3.0);
-        let e: f64x2 = f64x2::new(8.0, 18.0);
-        let r: f64x2 = transmute(vfmsq_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_n_f64() {
-        let a: f64 = 50.0;
-        let b: f64 = 6.0;
-        let c: f64 = 8.0;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vfms_n_f64(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_n_f64() {
-        let a: f64x2 = f64x2::new(50.0, 35.0);
-        let b: f64x2 = f64x2::new(6.0, 4.0);
-        let c: f64 = 8.0;
-        let e: f64x2 = f64x2::new(2.0, 3.0);
-        let r: f64x2 = transmute(vfmsq_n_f64(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_lane_f32() {
-        let a: f32x2 = f32x2::new(14., 11.);
-        let b: f32x2 = f32x2::new(6., 4.);
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32x2 = f32x2::new(2., 3.);
-        let r: f32x2 = transmute(vfms_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_laneq_f32() {
-        let a: f32x2 = f32x2::new(14., 11.);
-        let b: f32x2 = f32x2::new(6., 4.);
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x2 = f32x2::new(2., 3.);
-        let r: f32x2 = transmute(vfms_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_lane_f32() {
-        let a: f32x4 = f32x4::new(14., 11., 18., 21.);
-        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32x4 = f32x4::new(2., 3., 4., 5.);
-        let r: f32x4 = transmute(vfmsq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_laneq_f32() {
-        let a: f32x4 = f32x4::new(14., 11., 18., 21.);
-        let b: f32x4 = f32x4::new(6., 4., 7., 8.);
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x4 = f32x4::new(2., 3., 4., 5.);
-        let r: f32x4 = transmute(vfmsq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_lane_f64() {
-        let a: f64 = 14.;
-        let b: f64 = 6.;
-        let c: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = transmute(vfms_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfms_laneq_f64() {
-        let a: f64 = 14.;
-        let b: f64 = 6.;
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = transmute(vfms_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_lane_f64() {
-        let a: f64x2 = f64x2::new(14., 11.);
-        let b: f64x2 = f64x2::new(6., 4.);
-        let c: f64 = 2.;
-        let e: f64x2 = f64x2::new(2., 3.);
-        let r: f64x2 = transmute(vfmsq_lane_f64::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsq_laneq_f64() {
-        let a: f64x2 = f64x2::new(14., 11.);
-        let b: f64x2 = f64x2::new(6., 4.);
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64x2 = f64x2::new(2., 3.);
-        let r: f64x2 = transmute(vfmsq_laneq_f64::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmss_lane_f32() {
-        let a: f32 = 14.;
-        let b: f32 = 6.;
-        let c: f32x2 = f32x2::new(2., 0.);
-        let e: f32 = 2.;
-        let r: f32 = vfmss_lane_f32::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmss_laneq_f32() {
-        let a: f32 = 14.;
-        let b: f32 = 6.;
-        let c: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32 = 2.;
-        let r: f32 = vfmss_laneq_f32::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsd_lane_f64() {
-        let a: f64 = 14.;
-        let b: f64 = 6.;
-        let c: f64 = 2.;
-        let e: f64 = 2.;
-        let r: f64 = vfmsd_lane_f64::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vfmsd_laneq_f64() {
-        let a: f64 = 14.;
-        let b: f64 = 6.;
-        let c: f64x2 = f64x2::new(2., 0.);
-        let e: f64 = 2.;
-        let r: f64 = vfmsd_laneq_f64::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdiv_f32() {
-        let a: f32x2 = f32x2::new(2.0, 6.0);
-        let b: f32x2 = f32x2::new(1.0, 2.0);
-        let e: f32x2 = f32x2::new(2.0, 3.0);
-        let r: f32x2 = transmute(vdiv_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdivq_f32() {
-        let a: f32x4 = f32x4::new(2.0, 6.0, 4.0, 10.0);
-        let b: f32x4 = f32x4::new(1.0, 2.0, 1.0, 2.0);
-        let e: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
-        let r: f32x4 = transmute(vdivq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdiv_f64() {
-        let a: f64 = 2.0;
-        let b: f64 = 1.0;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vdiv_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdivq_f64() {
-        let a: f64x2 = f64x2::new(2.0, 6.0);
-        let b: f64x2 = f64x2::new(1.0, 2.0);
-        let e: f64x2 = f64x2::new(2.0, 3.0);
-        let r: f64x2 = transmute(vdivq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 1.0;
-        let e: f64 = 0.0;
-        let r: f64 = transmute(vsub_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 4.0);
-        let b: f64x2 = f64x2::new(1.0, 2.0);
-        let e: f64x2 = f64x2::new(0.0, 2.0);
-        let r: f64x2 = transmute(vsubq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubd_s64() {
-        let a: i64 = 3;
-        let b: i64 = 2;
-        let e: i64 = 1;
-        let r: i64 = vsubd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubd_u64() {
-        let a: u64 = 3;
-        let b: u64 = 2;
-        let e: u64 = 1;
-        let r: u64 = vsubd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddd_s64() {
-        let a: i64 = 1;
-        let b: i64 = 2;
-        let e: i64 = 3;
-        let r: i64 = vaddd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddd_u64() {
-        let a: u64 = 1;
-        let b: u64 = 2;
-        let e: u64 = 3;
-        let r: u64 = vaddd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddv_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let e: f32 = 3.;
-        let r: f32 = vaddv_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 0., 0.);
-        let e: f32 = 3.;
-        let r: f32 = vaddvq_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let e: f64 = 3.;
-        let r: f64 = vaddvq_f64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i32 = 10;
-        let r: i32 = vaddlv_s16(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32 = 36;
-        let r: i32 = vaddlvq_s16(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let e: i64 = 3;
-        let r: i64 = vaddlv_s32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i64 = 10;
-        let r: i64 = vaddlvq_s32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u32 = 10;
-        let r: u32 = vaddlv_u16(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32 = 36;
-        let r: u32 = vaddlvq_u16(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let e: u64 = 3;
-        let r: u64 = vaddlv_u32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u64 = 10;
-        let r: u64 = vaddlvq_u32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_s8() {
-        let a: i16x8 = i16x8::new(8, 9, 10, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vsubw_high_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_s16() {
-        let a: i32x4 = i32x4::new(8, 9, 10, 11);
-        let b: i16x8 = i16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
-        let e: i32x4 = i32x4::new(0, 0, 0, 0);
-        let r: i32x4 = transmute(vsubw_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_s32() {
-        let a: i64x2 = i64x2::new(8, 9);
-        let b: i32x4 = i32x4::new(6, 7, 8, 9);
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vsubw_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_u8() {
-        let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vsubw_high_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_u16() {
-        let a: u32x4 = u32x4::new(8, 9, 10, 11);
-        let b: u16x8 = u16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0);
-        let r: u32x4 = transmute(vsubw_high_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_high_u32() {
-        let a: u64x2 = u64x2::new(8, 9);
-        let b: u32x4 = u32x4::new(6, 7, 8, 9);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vsubw_high_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vsubl_high_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_s16() {
-        let a: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i16x8 = i16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
-        let e: i32x4 = i32x4::new(4, 5, 6, 7);
-        let r: i32x4 = transmute(vsubl_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_s32() {
-        let a: i32x4 = i32x4::new(12, 13, 14, 15);
-        let b: i32x4 = i32x4::new(6, 6, 8, 8);
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vsubl_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vsubl_high_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_u16() {
-        let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u16x8 = u16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
-        let e: u32x4 = u32x4::new(4, 5, 6, 7);
-        let r: u32x4 = transmute(vsubl_high_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_high_u32() {
-        let a: u32x4 = u32x4::new(12, 13, 14, 15);
-        let b: u32x4 = u32x4::new(6, 6, 8, 8);
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vsubl_high_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vbcaxq_s8() {
-        let a: i8x16 = i8x16::new(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0);
-        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let c: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let e: i8x16 = i8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-        let r: i8x16 = transmute(vbcaxq_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vbcaxq_s16() {
-        let a: i16x8 = i16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
-        let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let c: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let e: i16x8 = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
-        let r: i16x8 = transmute(vbcaxq_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vbcaxq_s32() {
-        let a: i32x4 = i32x4::new(1, 0, 1, 0);
-        let b: i32x4 = i32x4::new(0, 1, 2, 3);
-        let c: i32x4 = i32x4::new(1, 1, 1, 1);
-        let e: i32x4 = i32x4::new(1, 0, 3, 2);
-        let r: i32x4 = transmute(vbcaxq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vbcaxq_s64() {
-        let a: i64x2 = i64x2::new(1, 0);
-        let b: i64x2 = i64x2::new(0, 1);
-        let c: i64x2 = i64x2::new(1, 1);
-        let e: i64x2 = i64x2::new(1, 0);
-        let r: i64x2 = transmute(vbcaxq_s64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vbcaxq_u8() {
-        let a: u8x16 = u8x16::new(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0);
-        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let c: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let e: u8x16 = u8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-        let r: u8x16 = transmute(vbcaxq_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vbcaxq_u16() {
-        let a: u16x8 = u16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
-        let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let c: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let e: u16x8 = u16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
-        let r: u16x8 = transmute(vbcaxq_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vbcaxq_u32() {
-        let a: u32x4 = u32x4::new(1, 0, 1, 0);
-        let b: u32x4 = u32x4::new(0, 1, 2, 3);
-        let c: u32x4 = u32x4::new(1, 1, 1, 1);
-        let e: u32x4 = u32x4::new(1, 0, 3, 2);
-        let r: u32x4 = transmute(vbcaxq_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vbcaxq_u64() {
-        let a: u64x2 = u64x2::new(1, 0);
-        let b: u64x2 = u64x2::new(0, 1);
-        let c: u64x2 = u64x2::new(1, 1);
-        let e: u64x2 = u64x2::new(1, 0);
-        let r: u64x2 = transmute(vbcaxq_u64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcadd_rot270_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let e: f32x2 = f32x2::new(2., 0.);
-        let r: f32x2 = transmute(vcadd_rot270_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcaddq_rot270_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
-        let r: f32x4 = transmute(vcaddq_rot270_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcaddq_rot270_f64() {
-        let a: f64x2 = f64x2::new(1., -1.);
-        let b: f64x2 = f64x2::new(-1., 1.);
-        let e: f64x2 = f64x2::new(2., 0.);
-        let r: f64x2 = transmute(vcaddq_rot270_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcadd_rot90_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let e: f32x2 = f32x2::new(0., -2.);
-        let r: f32x2 = transmute(vcadd_rot90_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcaddq_rot90_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let e: f32x4 = f32x4::new(0., -2., 0., -2.);
-        let r: f32x4 = transmute(vcaddq_rot90_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcaddq_rot90_f64() {
-        let a: f64x2 = f64x2::new(1., -1.);
-        let b: f64x2 = f64x2::new(-1., 1.);
-        let e: f64x2 = f64x2::new(0., -2.);
-        let r: f64x2 = transmute(vcaddq_rot90_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(0., -2.);
-        let r: f32x2 = transmute(vcmla_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
-        let e: f32x4 = f32x4::new(0., -2., 2., 0.);
-        let r: f32x4 = transmute(vcmlaq_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_f64() {
-        let a: f64x2 = f64x2::new(1., -1.);
-        let b: f64x2 = f64x2::new(-1., 1.);
-        let c: f64x2 = f64x2::new(1., 1.);
-        let e: f64x2 = f64x2::new(0., -2.);
-        let r: f64x2 = transmute(vcmlaq_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_rot90_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let b: f32x2 = f32x2::new(1., -1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(2., 0.);
-        let r: f32x2 = transmute(vcmla_rot90_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot90_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let b: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let c: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
-        let r: f32x4 = transmute(vcmlaq_rot90_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot90_f64() {
-        let a: f64x2 = f64x2::new(1., 1.);
-        let b: f64x2 = f64x2::new(1., -1.);
-        let c: f64x2 = f64x2::new(1., 1.);
-        let e: f64x2 = f64x2::new(2., 0.);
-        let r: f64x2 = transmute(vcmlaq_rot90_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_rot180_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let b: f32x2 = f32x2::new(1., -1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vcmla_rot180_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot180_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let b: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let c: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vcmlaq_rot180_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot180_f64() {
-        let a: f64x2 = f64x2::new(1., 1.);
-        let b: f64x2 = f64x2::new(1., -1.);
-        let c: f64x2 = f64x2::new(1., 1.);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vcmlaq_rot180_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_rot270_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let b: f32x2 = f32x2::new(1., -1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(0., 2.);
-        let r: f32x2 = transmute(vcmla_rot270_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot270_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let b: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let c: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let e: f32x4 = f32x4::new(0., 2., 0., 2.);
-        let r: f32x4 = transmute(vcmlaq_rot270_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot270_f64() {
-        let a: f64x2 = f64x2::new(1., 1.);
-        let b: f64x2 = f64x2::new(1., -1.);
-        let c: f64x2 = f64x2::new(1., 1.);
-        let e: f64x2 = f64x2::new(0., 2.);
-        let r: f64x2 = transmute(vcmlaq_rot270_f64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_lane_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(0., -2.);
-        let r: f32x2 = transmute(vcmla_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_laneq_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
-        let e: f32x2 = f32x2::new(0., -2.);
-        let r: f32x2 = transmute(vcmla_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_lane_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x4 = f32x4::new(0., -2., 0., -2.);
-        let r: f32x4 = transmute(vcmlaq_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
-        let e: f32x4 = f32x4::new(0., -2., 0., -2.);
-        let r: f32x4 = transmute(vcmlaq_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_rot90_lane_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vcmla_rot90_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_rot90_laneq_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vcmla_rot90_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot90_lane_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vcmlaq_rot90_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot90_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vcmlaq_rot90_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_rot180_lane_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(2., 0.);
-        let r: f32x2 = transmute(vcmla_rot180_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_rot180_laneq_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
-        let e: f32x2 = f32x2::new(2., 0.);
-        let r: f32x2 = transmute(vcmla_rot180_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot180_lane_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
-        let r: f32x4 = transmute(vcmlaq_rot180_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot180_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
-        let e: f32x4 = f32x4::new(2., 0., 2., 0.);
-        let r: f32x4 = transmute(vcmlaq_rot180_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_rot270_lane_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(2., -2.);
-        let r: f32x2 = transmute(vcmla_rot270_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmla_rot270_laneq_f32() {
-        let a: f32x2 = f32x2::new(1., -1.);
-        let b: f32x2 = f32x2::new(-1., 1.);
-        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
-        let e: f32x2 = f32x2::new(2., -2.);
-        let r: f32x2 = transmute(vcmla_rot270_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot270_lane_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let c: f32x2 = f32x2::new(1., 1.);
-        let e: f32x4 = f32x4::new(2., -2., 2., -2.);
-        let r: f32x4 = transmute(vcmlaq_rot270_lane_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,fcma")]
-    unsafe fn test_vcmlaq_rot270_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., -1., 1., -1.);
-        let b: f32x4 = f32x4::new(-1., 1., -1., 1.);
-        let c: f32x4 = f32x4::new(1., 1., -1., -1.);
-        let e: f32x4 = f32x4::new(2., -2., 2., -2.);
-        let r: f32x4 = transmute(vcmlaq_rot270_laneq_f32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_laneq_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i8x8 = i8x8::new(-1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x2 = i32x2::new(29, 72);
-        let r: i32x2 = transmute(vdot_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 1, 2);
-        let b: i8x16 = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x4 = i32x4::new(29, 72, 31, 72);
-        let r: i32x4 = transmute(vdotq_laneq_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_laneq_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u8x8 = u8x8::new(255, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x2 = u32x2::new(285, 72);
-        let r: u32x2 = transmute(vdot_laneq_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 1, 2);
-        let b: u8x16 = u8x16::new(255, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x4 = u32x4::new(285, 72, 31, 72);
-        let r: u32x4 = transmute(vdotq_laneq_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmax_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 0.0;
-        let e: f64 = 1.0;
-        let r: f64 = transmute(vmax_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxq_f64() {
-        let a: f64x2 = f64x2::new(1.0, -2.0);
-        let b: f64x2 = f64x2::new(0.0, 3.0);
-        let e: f64x2 = f64x2::new(1.0, 3.0);
-        let r: f64x2 = transmute(vmaxq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxnm_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 8.0;
-        let e: f64 = 8.0;
-        let r: f64 = transmute(vmaxnm_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxnmq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let b: f64x2 = f64x2::new(8.0, 16.0);
-        let e: f64x2 = f64x2::new(8.0, 16.0);
-        let r: f64x2 = transmute(vmaxnmq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxnmv_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let e: f32 = 2.;
-        let r: f32 = vmaxnmv_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxnmvq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let e: f64 = 2.;
-        let r: f64 = vmaxnmvq_f64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxnmvq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 0., 1.);
-        let e: f32 = 2.;
-        let r: f32 = vmaxnmvq_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxnm_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let b: f32x2 = f32x2::new(6.0, -3.0);
-        let e: f32x2 = f32x2::new(2.0, 6.0);
-        let r: f32x2 = transmute(vpmaxnm_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxnmq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let b: f64x2 = f64x2::new(6.0, -3.0);
-        let e: f64x2 = f64x2::new(2.0, 6.0);
-        let r: f64x2 = transmute(vpmaxnmq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxnmq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
-        let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
-        let e: f32x4 = f32x4::new(2.0, 3.0, 16.0, 6.0);
-        let r: f32x4 = transmute(vpmaxnmq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxnms_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let e: f32 = 2.;
-        let r: f32 = vpmaxnms_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxnmqd_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let e: f64 = 2.;
-        let r: f64 = vpmaxnmqd_f64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxs_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let e: f32 = 2.;
-        let r: f32 = vpmaxs_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxqd_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let e: f64 = 2.;
-        let r: f64 = vpmaxqd_f64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmin_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 0.0;
-        let e: f64 = 0.0;
-        let r: f64 = transmute(vmin_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminq_f64() {
-        let a: f64x2 = f64x2::new(1.0, -2.0);
-        let b: f64x2 = f64x2::new(0.0, 3.0);
-        let e: f64x2 = f64x2::new(0.0, -2.0);
-        let r: f64x2 = transmute(vminq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminnm_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 8.0;
-        let e: f64 = 1.0;
-        let r: f64 = transmute(vminnm_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminnmq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let b: f64x2 = f64x2::new(8.0, 16.0);
-        let e: f64x2 = f64x2::new(1.0, 2.0);
-        let r: f64x2 = transmute(vminnmq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminnmv_f32() {
-        let a: f32x2 = f32x2::new(1., 0.);
-        let e: f32 = 0.;
-        let r: f32 = vminnmv_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminnmvq_f64() {
-        let a: f64x2 = f64x2::new(1., 0.);
-        let e: f64 = 0.;
-        let r: f64 = vminnmvq_f64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminnmvq_f32() {
-        let a: f32x4 = f32x4::new(1., 0., 2., 3.);
-        let e: f32 = 0.;
-        let r: f32 = vminnmvq_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_high_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10);
-        let e: i16x8 = i16x8::new(3, 4, 5, 6, 7, 8, 9, 10);
-        let r: i16x8 = transmute(vmovl_high_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_high_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 3, 4, 5, 6);
-        let e: i32x4 = i32x4::new(3, 4, 5, 6);
-        let r: i32x4 = transmute(vmovl_high_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_high_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i64x2 = i64x2::new(3, 4);
-        let r: i64x2 = transmute(vmovl_high_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_high_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9, 10);
-        let e: u16x8 = u16x8::new(3, 4, 5, 6, 7, 8, 9, 10);
-        let r: u16x8 = transmute(vmovl_high_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_high_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 3, 4, 5, 6);
-        let e: u32x4 = u32x4::new(3, 4, 5, 6);
-        let r: u32x4 = transmute(vmovl_high_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_high_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u64x2 = u64x2::new(3, 4);
-        let r: u64x2 = transmute(vmovl_high_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x4 = f32x4::new(3., 4., 5., 6.);
-        let e: f32x4 = f32x4::new(3., 7., 7., 11.);
-        let r: f32x4 = transmute(vpaddq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let b: f64x2 = f64x2::new(3., 4.);
-        let e: f64x2 = f64x2::new(3., 7.);
-        let r: f64x2 = transmute(vpaddq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpadds_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let e: f32 = 3.;
-        let r: f32 = vpadds_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddd_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let e: f64 = 3.;
-        let r: f64 = vpaddd_f64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminnm_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let b: f32x2 = f32x2::new(6.0, -3.0);
-        let e: f32x2 = f32x2::new(1.0, -3.0);
-        let r: f32x2 = transmute(vpminnm_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminnmq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let b: f64x2 = f64x2::new(6.0, -3.0);
-        let e: f64x2 = f64x2::new(1.0, -3.0);
-        let r: f64x2 = transmute(vpminnmq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminnmq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
-        let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
-        let e: f32x4 = f32x4::new(1.0, -4.0, 8.0, -1.0);
-        let r: f32x4 = transmute(vpminnmq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminnms_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let e: f32 = 1.;
-        let r: f32 = vpminnms_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminnmqd_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let e: f64 = 1.;
-        let r: f64 = vpminnmqd_f64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmins_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let e: f32 = 1.;
-        let r: f32 = vpmins_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminqd_f64() {
-        let a: f64x2 = f64x2::new(1., 2.);
-        let e: f64 = 1.;
-        let r: f64 = vpminqd_f64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmullh_s16() {
-        let a: i16 = 2;
-        let b: i16 = 3;
-        let e: i32 = 12;
-        let r: i32 = vqdmullh_s16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulls_s32() {
-        let a: i32 = 2;
-        let b: i32 = 3;
-        let e: i64 = 12;
-        let r: i64 = vqdmulls_s32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_high_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
-        let e: i32x4 = i32x4::new(40, 60, 84, 112);
-        let r: i32x4 = transmute(vqdmull_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_high_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 4, 5);
-        let b: i32x4 = i32x4::new(1, 2, 5, 6);
-        let e: i64x2 = i64x2::new(40, 60);
-        let r: i64x2 = transmute(vqdmull_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_high_n_s16() {
-        let a: i16x8 = i16x8::new(0, 2, 8, 10, 8, 10, 12, 14);
-        let b: i16 = 2;
-        let e: i32x4 = i32x4::new(32, 40, 48, 56);
-        let r: i32x4 = transmute(vqdmull_high_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_high_n_s32() {
-        let a: i32x4 = i32x4::new(0, 2, 8, 10);
-        let b: i32 = 2;
-        let e: i64x2 = i64x2::new(32, 40);
-        let r: i64x2 = transmute(vqdmull_high_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_laneq_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
-        let e: i32x4 = i32x4::new(4, 8, 12, 16);
-        let r: i32x4 = transmute(vqdmull_laneq_s16::<4>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_laneq_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x4 = i32x4::new(0, 2, 2, 0);
-        let e: i64x2 = i64x2::new(4, 8);
-        let r: i64x2 = transmute(vqdmull_laneq_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmullh_lane_s16() {
-        let a: i16 = 2;
-        let b: i16x4 = i16x4::new(0, 2, 2, 0);
-        let e: i32 = 8;
-        let r: i32 = vqdmullh_lane_s16::<2>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmullh_laneq_s16() {
-        let a: i16 = 2;
-        let b: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
-        let e: i32 = 8;
-        let r: i32 = vqdmullh_laneq_s16::<4>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulls_lane_s32() {
-        let a: i32 = 2;
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i64 = 8;
-        let r: i64 = vqdmulls_lane_s32::<1>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulls_laneq_s32() {
-        let a: i32 = 2;
-        let b: i32x4 = i32x4::new(0, 2, 2, 0);
-        let e: i64 = 8;
-        let r: i64 = vqdmulls_laneq_s32::<2>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_high_lane_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
-        let b: i16x4 = i16x4::new(0, 2, 2, 0);
-        let e: i32x4 = i32x4::new(16, 20, 24, 28);
-        let r: i32x4 = transmute(vqdmull_high_lane_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_high_lane_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 4, 5);
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(16, 20);
-        let r: i64x2 = transmute(vqdmull_high_lane_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_high_laneq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
-        let e: i32x4 = i32x4::new(16, 20, 24, 28);
-        let r: i32x4 = transmute(vqdmull_high_laneq_s16::<4>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_high_laneq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 4, 5);
-        let b: i32x4 = i32x4::new(0, 2, 2, 0);
-        let e: i64x2 = i64x2::new(16, 20);
-        let r: i64x2 = transmute(vqdmull_high_laneq_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_high_s16() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
-        let c: i16x8 = i16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
-        let e: i32x4 = i32x4::new(41, 62, 87, 116);
-        let r: i32x4 = transmute(vqdmlal_high_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_high_s32() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i32x4 = i32x4::new(0, 1, 4, 5);
-        let c: i32x4 = i32x4::new(1, 2, 5, 6);
-        let e: i64x2 = i64x2::new(41, 62);
-        let r: i64x2 = transmute(vqdmlal_high_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_high_n_s16() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 2, 8, 10, 8, 10, 12, 14);
-        let c: i16 = 2;
-        let e: i32x4 = i32x4::new(33, 42, 51, 60);
-        let r: i32x4 = transmute(vqdmlal_high_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_high_n_s32() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i32x4 = i32x4::new(0, 2, 8, 10);
-        let c: i32 = 2;
-        let e: i64x2 = i64x2::new(33, 42);
-        let r: i64x2 = transmute(vqdmlal_high_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_laneq_s16() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let c: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
-        let e: i32x4 = i32x4::new(5, 10, 15, 20);
-        let r: i32x4 = transmute(vqdmlal_laneq_s16::<2>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_laneq_s32() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i32x2 = i32x2::new(1, 2);
-        let c: i32x4 = i32x4::new(0, 2, 2, 0);
-        let e: i64x2 = i64x2::new(5, 10);
-        let r: i64x2 = transmute(vqdmlal_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_high_lane_s16() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(17, 22, 27, 32);
-        let r: i32x4 = transmute(vqdmlal_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_high_laneq_s16() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(17, 22, 27, 32);
-        let r: i32x4 = transmute(vqdmlal_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_high_lane_s32() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i32x4 = i32x4::new(0, 1, 4, 5);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(17, 22);
-        let r: i64x2 = transmute(vqdmlal_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_high_laneq_s32() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i32x4 = i32x4::new(0, 1, 4, 5);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i64x2 = i64x2::new(17, 22);
-        let r: i64x2 = transmute(vqdmlal_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlalh_s16() {
-        let a: i32 = 1;
-        let b: i16 = 1;
-        let c: i16 = 2;
-        let e: i32 = 5;
-        let r: i32 = vqdmlalh_s16(a, b, c);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlals_s32() {
-        let a: i64 = 1;
-        let b: i32 = 1;
-        let c: i32 = 2;
-        let e: i64 = 5;
-        let r: i64 = vqdmlals_s32(a, b, c);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlalh_lane_s16() {
-        let a: i32 = 1;
-        let b: i16 = 1;
-        let c: i16x4 = i16x4::new(2, 1, 1, 1);
-        let e: i32 = 5;
-        let r: i32 = vqdmlalh_lane_s16::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlalh_laneq_s16() {
-        let a: i32 = 1;
-        let b: i16 = 1;
-        let c: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
-        let e: i32 = 5;
-        let r: i32 = vqdmlalh_laneq_s16::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlals_lane_s32() {
-        let a: i64 = 1;
-        let b: i32 = 1;
-        let c: i32x2 = i32x2::new(2, 1);
-        let e: i64 = 5;
-        let r: i64 = vqdmlals_lane_s32::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlals_laneq_s32() {
-        let a: i64 = 1;
-        let b: i32 = 1;
-        let c: i32x4 = i32x4::new(2, 1, 1, 1);
-        let e: i64 = 5;
-        let r: i64 = vqdmlals_laneq_s32::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_high_s16() {
-        let a: i32x4 = i32x4::new(39, 58, 81, 108);
-        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
-        let c: i16x8 = i16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
-        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
-        let r: i32x4 = transmute(vqdmlsl_high_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_high_s32() {
-        let a: i64x2 = i64x2::new(39, 58);
-        let b: i32x4 = i32x4::new(0, 1, 4, 5);
-        let c: i32x4 = i32x4::new(1, 2, 5, 6);
-        let e: i64x2 = i64x2::new(-1, -2);
-        let r: i64x2 = transmute(vqdmlsl_high_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_high_n_s16() {
-        let a: i32x4 = i32x4::new(31, 38, 45, 52);
-        let b: i16x8 = i16x8::new(0, 2, 8, 10, 8, 10, 12, 14);
-        let c: i16 = 2;
-        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
-        let r: i32x4 = transmute(vqdmlsl_high_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_high_n_s32() {
-        let a: i64x2 = i64x2::new(31, 38);
-        let b: i32x4 = i32x4::new(0, 2, 8, 10);
-        let c: i32 = 2;
-        let e: i64x2 = i64x2::new(-1, -2);
-        let r: i64x2 = transmute(vqdmlsl_high_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_laneq_s16() {
-        let a: i32x4 = i32x4::new(3, 6, 9, 12);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let c: i16x8 = i16x8::new(0, 2, 2, 0, 2, 0, 0, 0);
-        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
-        let r: i32x4 = transmute(vqdmlsl_laneq_s16::<2>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_laneq_s32() {
-        let a: i64x2 = i64x2::new(3, 6);
-        let b: i32x2 = i32x2::new(1, 2);
-        let c: i32x4 = i32x4::new(0, 2, 2, 0);
-        let e: i64x2 = i64x2::new(-1, -2);
-        let r: i64x2 = transmute(vqdmlsl_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_high_lane_s16() {
-        let a: i32x4 = i32x4::new(15, 18, 21, 24);
-        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
-        let r: i32x4 = transmute(vqdmlsl_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_high_laneq_s16() {
-        let a: i32x4 = i32x4::new(15, 18, 21, 24);
-        let b: i16x8 = i16x8::new(0, 1, 4, 5, 4, 5, 6, 7);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
-        let r: i32x4 = transmute(vqdmlsl_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_high_lane_s32() {
-        let a: i64x2 = i64x2::new(15, 18);
-        let b: i32x4 = i32x4::new(0, 1, 4, 5);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(-1, -2);
-        let r: i64x2 = transmute(vqdmlsl_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_high_laneq_s32() {
-        let a: i64x2 = i64x2::new(15, 18);
-        let b: i32x4 = i32x4::new(0, 1, 4, 5);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i64x2 = i64x2::new(-1, -2);
-        let r: i64x2 = transmute(vqdmlsl_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlslh_s16() {
-        let a: i32 = 10;
-        let b: i16 = 1;
-        let c: i16 = 2;
-        let e: i32 = 6;
-        let r: i32 = vqdmlslh_s16(a, b, c);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsls_s32() {
-        let a: i64 = 10;
-        let b: i32 = 1;
-        let c: i32 = 2;
-        let e: i64 = 6;
-        let r: i64 = vqdmlsls_s32(a, b, c);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlslh_lane_s16() {
-        let a: i32 = 10;
-        let b: i16 = 1;
-        let c: i16x4 = i16x4::new(2, 1, 1, 1);
-        let e: i32 = 6;
-        let r: i32 = vqdmlslh_lane_s16::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlslh_laneq_s16() {
-        let a: i32 = 10;
-        let b: i16 = 1;
-        let c: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
-        let e: i32 = 6;
-        let r: i32 = vqdmlslh_laneq_s16::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsls_lane_s32() {
-        let a: i64 = 10;
-        let b: i32 = 1;
-        let c: i32x2 = i32x2::new(2, 1);
-        let e: i64 = 6;
-        let r: i64 = vqdmlsls_lane_s32::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsls_laneq_s32() {
-        let a: i64 = 10;
-        let b: i32 = 1;
-        let c: i32x4 = i32x4::new(2, 1, 1, 1);
-        let e: i64 = 6;
-        let r: i64 = vqdmlsls_laneq_s32::<0>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhh_s16() {
-        let a: i16 = 1;
-        let b: i16 = 2;
-        let e: i16 = 0;
-        let r: i16 = vqdmulhh_s16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhs_s32() {
-        let a: i32 = 1;
-        let b: i32 = 2;
-        let e: i32 = 0;
-        let r: i32 = vqdmulhs_s32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhh_lane_s16() {
-        let a: i16 = 2;
-        let b: i16x4 = i16x4::new(0, 0, 0x7F_FF, 0);
-        let e: i16 = 1;
-        let r: i16 = vqdmulhh_lane_s16::<2>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhh_laneq_s16() {
-        let a: i16 = 2;
-        let b: i16x8 = i16x8::new(0, 0, 0x7F_FF, 0, 0, 0, 0, 0);
-        let e: i16 = 1;
-        let r: i16 = vqdmulhh_laneq_s16::<2>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhs_lane_s32() {
-        let a: i32 = 2;
-        let b: i32x2 = i32x2::new(0, 0x7F_FF_FF_FF);
-        let e: i32 = 1;
-        let r: i32 = vqdmulhs_lane_s32::<1>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhs_laneq_s32() {
-        let a: i32 = 2;
-        let b: i32x4 = i32x4::new(0, 0x7F_FF_FF_FF, 0, 0);
-        let e: i32 = 1;
-        let r: i32 = vqdmulhs_laneq_s32::<1>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulh_lane_s16() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x4 = i16x4::new(2, 1, 1, 1);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vqdmulh_lane_s16::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhq_lane_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x4 = i16x4::new(2, 1, 1, 1);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vqdmulhq_lane_s16::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulh_lane_s32() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x2 = i32x2::new(2, 1);
-        let e: i32x2 = i32x2::new(1, 1);
-        let r: i32x2 = transmute(vqdmulh_lane_s32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhq_lane_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x2 = i32x2::new(2, 1);
-        let e: i32x4 = i32x4::new(1, 1, 1, 1);
-        let r: i32x4 = transmute(vqdmulhq_lane_s32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovnh_s16() {
-        let a: i16 = 1;
-        let e: i8 = 1;
-        let r: i8 = vqmovnh_s16(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovns_s32() {
-        let a: i32 = 1;
-        let e: i16 = 1;
-        let r: i16 = vqmovns_s32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovnh_u16() {
-        let a: u16 = 1;
-        let e: u8 = 1;
-        let r: u8 = vqmovnh_u16(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovns_u32() {
-        let a: u32 = 1;
-        let e: u16 = 1;
-        let r: u16 = vqmovns_u32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovnd_s64() {
-        let a: i64 = 1;
-        let e: i32 = 1;
-        let r: i32 = vqmovnd_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovnd_u64() {
-        let a: u64 = 1;
-        let e: u32 = 1;
-        let r: u32 = vqmovnd_u64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_high_s16() {
-        let a: i8x8 = i8x8::new(0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
-        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let e: i8x16 = i8x16::new(0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
-        let r: i8x16 = transmute(vqmovn_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_high_s32() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let e: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let r: i16x8 = transmute(vqmovn_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_high_s64() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let r: i32x4 = transmute(vqmovn_high_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_high_u16() {
-        let a: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let b: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vqmovn_high_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_high_u32() {
-        let a: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vqmovn_high_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_high_u64() {
-        let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vqmovn_high_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovunh_s16() {
-        let a: i16 = 1;
-        let e: u8 = 1;
-        let r: u8 = vqmovunh_s16(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovuns_s32() {
-        let a: i32 = 1;
-        let e: u16 = 1;
-        let r: u16 = vqmovuns_s32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovund_s64() {
-        let a: i64 = 1;
-        let e: u32 = 1;
-        let r: u32 = vqmovund_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovun_high_s16() {
-        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let b: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vqmovun_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovun_high_s32() {
-        let a: u16x4 = u16x4::new(0, 0, 0, 0);
-        let b: i32x4 = i32x4::new(-1, -1, -1, -1);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vqmovun_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovun_high_s64() {
-        let a: u32x2 = u32x2::new(0, 0);
-        let b: i64x2 = i64x2::new(-1, -1);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0);
-        let r: u32x4 = transmute(vqmovun_high_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhh_s16() {
-        let a: i16 = 1;
-        let b: i16 = 2;
-        let e: i16 = 0;
-        let r: i16 = vqrdmulhh_s16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhs_s32() {
-        let a: i32 = 1;
-        let b: i32 = 2;
-        let e: i32 = 0;
-        let r: i32 = vqrdmulhs_s32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhh_lane_s16() {
-        let a: i16 = 1;
-        let b: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16 = 0;
-        let r: i16 = vqrdmulhh_lane_s16::<1>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhh_laneq_s16() {
-        let a: i16 = 1;
-        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16 = 0;
-        let r: i16 = vqrdmulhh_laneq_s16::<1>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhs_lane_s32() {
-        let a: i32 = 1;
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i32 = 0;
-        let r: i32 = vqrdmulhs_lane_s32::<1>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhs_laneq_s32() {
-        let a: i32 = 1;
-        let b: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32 = 0;
-        let r: i32 = vqrdmulhs_laneq_s32::<1>(a, transmute(b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlah_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(3, 3, 3, 3);
-        let r: i16x4 = transmute(vqrdmlah_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let r: i16x8 = transmute(vqrdmlahq_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlah_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x2 = i32x2::new(2, 2);
-        let e: i32x2 = i32x2::new(3, 3);
-        let r: i32x2 = transmute(vqrdmlah_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(3, 3, 3, 3);
-        let r: i32x4 = transmute(vqrdmlahq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahh_s16() {
-        let a: i16 = 1;
-        let b: i16 = 1;
-        let c: i16 = 2;
-        let e: i16 = 1;
-        let r: i16 = vqrdmlahh_s16(a, b, c);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahs_s32() {
-        let a: i32 = 1;
-        let b: i32 = 1;
-        let c: i32 = 2;
-        let e: i32 = 1;
-        let r: i32 = vqrdmlahs_s32(a, b, c);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlah_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16x4 = i16x4::new(3, 3, 3, 3);
-        let r: i16x4 = transmute(vqrdmlah_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlah_laneq_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(3, 3, 3, 3);
-        let r: i16x4 = transmute(vqrdmlah_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahq_lane_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let r: i16x8 = transmute(vqrdmlahq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahq_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let r: i16x8 = transmute(vqrdmlahq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlah_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i32x2 = i32x2::new(3, 3);
-        let r: i32x2 = transmute(vqrdmlah_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlah_laneq_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32x2 = i32x2::new(3, 3);
-        let r: i32x2 = transmute(vqrdmlah_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahq_lane_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i32x4 = i32x4::new(3, 3, 3, 3);
-        let r: i32x4 = transmute(vqrdmlahq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahq_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(3, 3, 3, 3);
-        let r: i32x4 = transmute(vqrdmlahq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahh_lane_s16() {
-        let a: i16 = 1;
-        let b: i16 = 1;
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16 = 1;
-        let r: i16 = vqrdmlahh_lane_s16::<1>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahh_laneq_s16() {
-        let a: i16 = 1;
-        let b: i16 = 1;
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16 = 1;
-        let r: i16 = vqrdmlahh_laneq_s16::<1>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahs_lane_s32() {
-        let a: i32 = 1;
-        let b: i32 = 1;
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i32 = 1;
-        let r: i32 = vqrdmlahs_lane_s32::<1>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlahs_laneq_s32() {
-        let a: i32 = 1;
-        let b: i32 = 1;
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32 = 1;
-        let r: i32 = vqrdmlahs_laneq_s32::<1>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlsh_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(-1, -1, -1, -1);
-        let r: i16x4 = transmute(vqrdmlsh_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
-        let r: i16x8 = transmute(vqrdmlshq_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlsh_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x2 = i32x2::new(2, 2);
-        let e: i32x2 = i32x2::new(-1, -1);
-        let r: i32x2 = transmute(vqrdmlsh_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(-1, -1, -1, -1);
-        let r: i32x4 = transmute(vqrdmlshq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshh_s16() {
-        let a: i16 = 1;
-        let b: i16 = 1;
-        let c: i16 = 2;
-        let e: i16 = 1;
-        let r: i16 = vqrdmlshh_s16(a, b, c);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshs_s32() {
-        let a: i32 = 1;
-        let b: i32 = 1;
-        let c: i32 = 2;
-        let e: i32 = 1;
-        let r: i32 = vqrdmlshs_s32(a, b, c);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlsh_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16x4 = i16x4::new(-1, -1, -1, -1);
-        let r: i16x4 = transmute(vqrdmlsh_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlsh_laneq_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(-1, -1, -1, -1);
-        let r: i16x4 = transmute(vqrdmlsh_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshq_lane_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
-        let r: i16x8 = transmute(vqrdmlshq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshq_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
-        let r: i16x8 = transmute(vqrdmlshq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlsh_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i32x2 = i32x2::new(-1, -1);
-        let r: i32x2 = transmute(vqrdmlsh_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlsh_laneq_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32x2 = i32x2::new(-1, -1);
-        let r: i32x2 = transmute(vqrdmlsh_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshq_lane_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i32x4 = i32x4::new(-1, -1, -1, -1);
-        let r: i32x4 = transmute(vqrdmlshq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshq_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(-1, -1, -1, -1);
-        let r: i32x4 = transmute(vqrdmlshq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshh_lane_s16() {
-        let a: i16 = 1;
-        let b: i16 = 1;
-        let c: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16 = 1;
-        let r: i16 = vqrdmlshh_lane_s16::<1>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshh_laneq_s16() {
-        let a: i16 = 1;
-        let b: i16 = 1;
-        let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16 = 1;
-        let r: i16 = vqrdmlshh_laneq_s16::<1>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshs_lane_s32() {
-        let a: i32 = 1;
-        let b: i32 = 1;
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i32 = 1;
-        let r: i32 = vqrdmlshs_lane_s32::<1>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "rdm")]
-    unsafe fn test_vqrdmlshs_laneq_s32() {
-        let a: i32 = 1;
-        let b: i32 = 1;
-        let c: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32 = 1;
-        let r: i32 = vqrdmlshs_laneq_s32::<1>(a, b, transmute(c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshls_s32() {
-        let a: i32 = 2;
-        let b: i32 = 2;
-        let e: i32 = 8;
-        let r: i32 = vqrshls_s32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshld_s64() {
-        let a: i64 = 2;
-        let b: i64 = 2;
-        let e: i64 = 8;
-        let r: i64 = vqrshld_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlb_s8() {
-        let a: i8 = 1;
-        let b: i8 = 2;
-        let e: i8 = 4;
-        let r: i8 = vqrshlb_s8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlh_s16() {
-        let a: i16 = 1;
-        let b: i16 = 2;
-        let e: i16 = 4;
-        let r: i16 = vqrshlh_s16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshls_u32() {
-        let a: u32 = 2;
-        let b: i32 = 2;
-        let e: u32 = 8;
-        let r: u32 = vqrshls_u32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshld_u64() {
-        let a: u64 = 2;
-        let b: i64 = 2;
-        let e: u64 = 8;
-        let r: u64 = vqrshld_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlb_u8() {
-        let a: u8 = 1;
-        let b: i8 = 2;
-        let e: u8 = 4;
-        let r: u8 = vqrshlb_u8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlh_u16() {
-        let a: u16 = 1;
-        let b: i16 = 2;
-        let e: u16 = 4;
-        let r: u16 = vqrshlh_u16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrnh_n_s16() {
-        let a: i16 = 4;
-        let e: i8 = 1;
-        let r: i8 = vqrshrnh_n_s16::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrns_n_s32() {
-        let a: i32 = 4;
-        let e: i16 = 1;
-        let r: i16 = vqrshrns_n_s32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrnd_n_s64() {
-        let a: i64 = 4;
-        let e: i32 = 1;
-        let r: i32 = vqrshrnd_n_s64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_high_n_s16() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 6, 7);
-        let b: i16x8 = i16x8::new(8, 12, 24, 28, 48, 52, 56, 60);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vqrshrn_high_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_high_n_s32() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(8, 12, 24, 28);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 6, 7);
-        let r: i16x8 = transmute(vqrshrn_high_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_high_n_s64() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i64x2 = i64x2::new(8, 12);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vqrshrn_high_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrnh_n_u16() {
-        let a: u16 = 4;
-        let e: u8 = 1;
-        let r: u8 = vqrshrnh_n_u16::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrns_n_u32() {
-        let a: u32 = 4;
-        let e: u16 = 1;
-        let r: u16 = vqrshrns_n_u32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrnd_n_u64() {
-        let a: u64 = 4;
-        let e: u32 = 1;
-        let r: u32 = vqrshrnd_n_u64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_high_n_u16() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 6, 7);
-        let b: u16x8 = u16x8::new(8, 12, 24, 28, 48, 52, 56, 60);
-        let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vqrshrn_high_n_u16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_high_n_u32() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(8, 12, 24, 28);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 6, 7);
-        let r: u16x8 = transmute(vqrshrn_high_n_u32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_high_n_u64() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u64x2 = u64x2::new(8, 12);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vqrshrn_high_n_u64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrunh_n_s16() {
-        let a: i16 = 4;
-        let e: u8 = 1;
-        let r: u8 = vqrshrunh_n_s16::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshruns_n_s32() {
-        let a: i32 = 4;
-        let e: u16 = 1;
-        let r: u16 = vqrshruns_n_s32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrund_n_s64() {
-        let a: i64 = 4;
-        let e: u32 = 1;
-        let r: u32 = vqrshrund_n_s64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrun_high_n_s16() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 6, 7);
-        let b: i16x8 = i16x8::new(8, 12, 24, 28, 48, 52, 56, 60);
-        let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vqrshrun_high_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrun_high_n_s32() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(8, 12, 24, 28);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 6, 7);
-        let r: u16x8 = transmute(vqrshrun_high_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrun_high_n_s64() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: i64x2 = i64x2::new(8, 12);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vqrshrun_high_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshld_s64() {
-        let a: i64 = 0;
-        let b: i64 = 2;
-        let e: i64 = 0;
-        let r: i64 = vqshld_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlb_s8() {
-        let a: i8 = 1;
-        let b: i8 = 2;
-        let e: i8 = 4;
-        let r: i8 = vqshlb_s8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlh_s16() {
-        let a: i16 = 1;
-        let b: i16 = 2;
-        let e: i16 = 4;
-        let r: i16 = vqshlh_s16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshls_s32() {
-        let a: i32 = 1;
-        let b: i32 = 2;
-        let e: i32 = 4;
-        let r: i32 = vqshls_s32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshld_u64() {
-        let a: u64 = 0;
-        let b: i64 = 2;
-        let e: u64 = 0;
-        let r: u64 = vqshld_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlb_u8() {
-        let a: u8 = 1;
-        let b: i8 = 2;
-        let e: u8 = 4;
-        let r: u8 = vqshlb_u8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlh_u16() {
-        let a: u16 = 1;
-        let b: i16 = 2;
-        let e: u16 = 4;
-        let r: u16 = vqshlh_u16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshls_u32() {
-        let a: u32 = 1;
-        let b: i32 = 2;
-        let e: u32 = 4;
-        let r: u32 = vqshls_u32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlb_n_s8() {
-        let a: i8 = 1;
-        let e: i8 = 4;
-        let r: i8 = vqshlb_n_s8::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlh_n_s16() {
-        let a: i16 = 1;
-        let e: i16 = 4;
-        let r: i16 = vqshlh_n_s16::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshls_n_s32() {
-        let a: i32 = 1;
-        let e: i32 = 4;
-        let r: i32 = vqshls_n_s32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshld_n_s64() {
-        let a: i64 = 1;
-        let e: i64 = 4;
-        let r: i64 = vqshld_n_s64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlb_n_u8() {
-        let a: u8 = 1;
-        let e: u8 = 4;
-        let r: u8 = vqshlb_n_u8::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlh_n_u16() {
-        let a: u16 = 1;
-        let e: u16 = 4;
-        let r: u16 = vqshlh_n_u16::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshls_n_u32() {
-        let a: u32 = 1;
-        let e: u32 = 4;
-        let r: u32 = vqshls_n_u32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshld_n_u64() {
-        let a: u64 = 1;
-        let e: u64 = 4;
-        let r: u64 = vqshld_n_u64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlub_n_s8() {
-        let a: i8 = 1;
-        let e: u8 = 4;
-        let r: u8 = vqshlub_n_s8::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshluh_n_s16() {
-        let a: i16 = 1;
-        let e: u16 = 4;
-        let r: u16 = vqshluh_n_s16::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlus_n_s32() {
-        let a: i32 = 1;
-        let e: u32 = 4;
-        let r: u32 = vqshlus_n_s32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlud_n_s64() {
-        let a: i64 = 1;
-        let e: u64 = 4;
-        let r: u64 = vqshlud_n_s64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrnd_n_s64() {
-        let a: i64 = 0;
-        let e: i32 = 0;
-        let r: i32 = vqshrnd_n_s64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrnh_n_s16() {
-        let a: i16 = 4;
-        let e: i8 = 1;
-        let r: i8 = vqshrnh_n_s16::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrns_n_s32() {
-        let a: i32 = 4;
-        let e: i16 = 1;
-        let r: i16 = vqshrns_n_s32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_high_n_s16() {
-        let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
-        let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vqshrn_high_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_high_n_s32() {
-        let a: i16x4 = i16x4::new(0, 1, 8, 9);
-        let b: i32x4 = i32x4::new(32, 36, 40, 44);
-        let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let r: i16x8 = transmute(vqshrn_high_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_high_n_s64() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i64x2 = i64x2::new(32, 36);
-        let e: i32x4 = i32x4::new(0, 1, 8, 9);
-        let r: i32x4 = transmute(vqshrn_high_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrnd_n_u64() {
-        let a: u64 = 0;
-        let e: u32 = 0;
-        let r: u32 = vqshrnd_n_u64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrnh_n_u16() {
-        let a: u16 = 4;
-        let e: u8 = 1;
-        let r: u8 = vqshrnh_n_u16::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrns_n_u32() {
-        let a: u32 = 4;
-        let e: u16 = 1;
-        let r: u16 = vqshrns_n_u32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_high_n_u16() {
-        let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
-        let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vqshrn_high_n_u16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_high_n_u32() {
-        let a: u16x4 = u16x4::new(0, 1, 8, 9);
-        let b: u32x4 = u32x4::new(32, 36, 40, 44);
-        let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let r: u16x8 = transmute(vqshrn_high_n_u32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_high_n_u64() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u64x2 = u64x2::new(32, 36);
-        let e: u32x4 = u32x4::new(0, 1, 8, 9);
-        let r: u32x4 = transmute(vqshrn_high_n_u64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrunh_n_s16() {
-        let a: i16 = 4;
-        let e: u8 = 1;
-        let r: u8 = vqshrunh_n_s16::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshruns_n_s32() {
-        let a: i32 = 4;
-        let e: u16 = 1;
-        let r: u16 = vqshruns_n_s32::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrund_n_s64() {
-        let a: i64 = 4;
-        let e: u32 = 1;
-        let r: u32 = vqshrund_n_s64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrun_high_n_s16() {
-        let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
-        let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vqshrun_high_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrun_high_n_s32() {
-        let a: u16x4 = u16x4::new(0, 1, 8, 9);
-        let b: i32x4 = i32x4::new(32, 36, 40, 44);
-        let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let r: u16x8 = transmute(vqshrun_high_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrun_high_n_s64() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: i64x2 = i64x2::new(32, 36);
-        let e: u32x4 = u32x4::new(0, 1, 8, 9);
-        let r: u32x4 = transmute(vqshrun_high_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqaddb_u8() {
-        let a: u8 = 2;
-        let b: i8 = 2;
-        let e: u8 = 4;
-        let r: u8 = vsqaddb_u8(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqaddh_u16() {
-        let a: u16 = 2;
-        let b: i16 = 2;
-        let e: u16 = 4;
-        let r: u16 = vsqaddh_u16(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqadds_u32() {
-        let a: u32 = 2;
-        let b: i32 = 2;
-        let e: u32 = 4;
-        let r: u32 = vsqadds_u32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqaddd_u64() {
-        let a: u64 = 2;
-        let b: i64 = 2;
-        let e: u64 = 4;
-        let r: u64 = vsqaddd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqrt_f32() {
-        let a: f32x2 = f32x2::new(4.0, 9.0);
-        let e: f32x2 = f32x2::new(2.0, 3.0);
-        let r: f32x2 = transmute(vsqrt_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqrtq_f32() {
-        let a: f32x4 = f32x4::new(4.0, 9.0, 16.0, 25.0);
-        let e: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
-        let r: f32x4 = transmute(vsqrtq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqrt_f64() {
-        let a: f64 = 4.0;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vsqrt_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqrtq_f64() {
-        let a: f64x2 = f64x2::new(4.0, 9.0);
-        let e: f64x2 = f64x2::new(2.0, 3.0);
-        let r: f64x2 = transmute(vsqrtq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrte_f64() {
-        let a: f64 = 1.0;
-        let e: f64 = 0.998046875;
-        let r: f64 = transmute(vrsqrte_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrteq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let e: f64x2 = f64x2::new(0.998046875, 0.705078125);
-        let r: f64x2 = transmute(vrsqrteq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrtes_f32() {
-        let a: f32 = 1.0;
-        let e: f32 = 0.998046875;
-        let r: f32 = vrsqrtes_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrted_f64() {
-        let a: f64 = 1.0;
-        let e: f64 = 0.998046875;
-        let r: f64 = vrsqrted_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrts_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 1.0;
-        let e: f64 = 1.;
-        let r: f64 = transmute(vrsqrts_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrtsq_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let b: f64x2 = f64x2::new(1.0, 2.0);
-        let e: f64x2 = f64x2::new(1., -0.5);
-        let r: f64x2 = transmute(vrsqrtsq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrtss_f32() {
-        let a: f32 = 1.0;
-        let b: f32 = 1.0;
-        let e: f32 = 1.;
-        let r: f32 = vrsqrtss_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrtsd_f64() {
-        let a: f64 = 1.0;
-        let b: f64 = 1.0;
-        let e: f64 = 1.;
-        let r: f64 = vrsqrtsd_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpe_f64() {
-        let a: f64 = 4.0;
-        let e: f64 = 0.24951171875;
-        let r: f64 = transmute(vrecpe_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpeq_f64() {
-        let a: f64x2 = f64x2::new(4.0, 3.0);
-        let e: f64x2 = f64x2::new(0.24951171875, 0.3330078125);
-        let r: f64x2 = transmute(vrecpeq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpes_f32() {
-        let a: f32 = 4.0;
-        let e: f32 = 0.24951171875;
-        let r: f32 = vrecpes_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecped_f64() {
-        let a: f64 = 4.0;
-        let e: f64 = 0.24951171875;
-        let r: f64 = vrecped_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecps_f64() {
-        let a: f64 = 4.0;
-        let b: f64 = 4.0;
-        let e: f64 = -14.;
-        let r: f64 = transmute(vrecps_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpsq_f64() {
-        let a: f64x2 = f64x2::new(4.0, 3.0);
-        let b: f64x2 = f64x2::new(4.0, 3.0);
-        let e: f64x2 = f64x2::new(-14., -7.);
-        let r: f64x2 = transmute(vrecpsq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpss_f32() {
-        let a: f32 = 4.0;
-        let b: f32 = 4.0;
-        let e: f32 = -14.;
-        let r: f32 = vrecpss_f32(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpsd_f64() {
-        let a: f64 = 4.0;
-        let b: f64 = 4.0;
-        let e: f64 = -14.;
-        let r: f64 = vrecpsd_f64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpxs_f32() {
-        let a: f32 = 4.0;
-        let e: f32 = 0.5;
-        let r: f32 = vrecpxs_f32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpxd_f64() {
-        let a: f64 = 4.0;
-        let e: f64 = 0.5;
-        let r: f64 = vrecpxd_f64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_f64() {
-        let a: f64 = 0.;
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_f64() {
-        let a: f64 = 0.;
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_s16_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_f64() {
-        let a: f64 = 0.;
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vreinterpret_s32_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_f64() {
-        let a: f64 = 0.;
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_s16_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: i32x4 = i32x4::new(0, 0, 0, 0);
-        let r: i32x4 = transmute(vreinterpretq_s32_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vreinterpretq_s64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_f64() {
-        let a: f64 = 0.;
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_f64() {
-        let a: f64 = 0.;
-        let e: u16x4 = u16x4::new(0, 0, 0, 0);
-        let r: u16x4 = transmute(vreinterpret_u16_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_f64() {
-        let a: f64 = 0.;
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vreinterpret_u32_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_f64() {
-        let a: f64 = 0.;
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vreinterpretq_u16_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0);
-        let r: u32x4 = transmute(vreinterpretq_u32_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vreinterpretq_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_f64() {
-        let a: f64 = 0.;
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_f64() {
-        let a: f64 = 0.;
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_p16_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p64_f64() {
-        let a: f64 = 0.;
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_p16_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vreinterpretq_p64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p64_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vreinterpretq_p64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p128_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_f64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_s16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_s32() {
-        let a: i32x2 = i32x2::new(0, 0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_s32() {
-        let a: i32x4 = i32x4::new(0, 0, 0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_s64() {
-        let a: i64x2 = i64x2::new(0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_u16() {
-        let a: u16x4 = u16x4::new(0, 0, 0, 0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_u32() {
-        let a: u32x2 = u32x2::new(0, 0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_u32() {
-        let a: u32x4 = u32x4::new(0, 0, 0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_u64() {
-        let a: u64x2 = u64x2::new(0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_p16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_p16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_p64() {
-        let a: i64x2 = i64x2::new(0, 0);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_p64() {
-        let a: i64x2 = i64x2::new(0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_p128() {
-        let a: p128 = 0;
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f64_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: f64 = 0.;
-        let r: f64 = transmute(vreinterpret_f64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_f64() {
-        let a: f64 = 0.;
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f64_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: f64x2 = f64x2::new(0., 0.);
-        let r: f64x2 = transmute(vreinterpretq_f64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_f64() {
-        let a: f64x2 = f64x2::new(0., 0.);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshld_s64() {
-        let a: i64 = 1;
-        let b: i64 = 2;
-        let e: i64 = 4;
-        let r: i64 = vrshld_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshld_u64() {
-        let a: u64 = 1;
-        let b: i64 = 2;
-        let e: u64 = 4;
-        let r: u64 = vrshld_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrd_n_s64() {
-        let a: i64 = 4;
-        let e: i64 = 1;
-        let r: i64 = vrshrd_n_s64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrd_n_u64() {
-        let a: u64 = 4;
-        let e: u64 = 1;
-        let r: u64 = vrshrd_n_u64::<2>(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_high_n_s16() {
-        let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
-        let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vrshrn_high_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_high_n_s32() {
-        let a: i16x4 = i16x4::new(0, 1, 8, 9);
-        let b: i32x4 = i32x4::new(32, 36, 40, 44);
-        let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let r: i16x8 = transmute(vrshrn_high_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_high_n_s64() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i64x2 = i64x2::new(32, 36);
-        let e: i32x4 = i32x4::new(0, 1, 8, 9);
-        let r: i32x4 = transmute(vrshrn_high_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_high_n_u16() {
-        let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
-        let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vrshrn_high_n_u16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_high_n_u32() {
-        let a: u16x4 = u16x4::new(0, 1, 8, 9);
-        let b: u32x4 = u32x4::new(32, 36, 40, 44);
-        let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
-        let r: u16x8 = transmute(vrshrn_high_n_u32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_high_n_u64() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u64x2 = u64x2::new(32, 36);
-        let e: u32x4 = u32x4::new(0, 1, 8, 9);
-        let r: u32x4 = transmute(vrshrn_high_n_u64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsrad_n_s64() {
-        let a: i64 = 1;
-        let b: i64 = 4;
-        let e: i64 = 2;
-        let r: i64 = vrsrad_n_s64::<2>(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsrad_n_u64() {
-        let a: u64 = 1;
-        let b: u64 = 4;
-        let e: u64 = 2;
-        let r: u64 = vrsrad_n_u64::<2>(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_high_s16() {
-        let a: i8x8 = i8x8::new(1, 2, 0, 0, 0, 0, 0, 0);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x16 = i8x16::new(1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vrsubhn_high_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_high_s32() {
-        let a: i16x4 = i16x4::new(1, 2, 0, 0);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let c: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i16x8 = i16x8::new(1, 2, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vrsubhn_high_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_high_s64() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i64x2 = i64x2::new(1, 2);
-        let c: i64x2 = i64x2::new(1, 2);
-        let e: i32x4 = i32x4::new(1, 2, 0, 0);
-        let r: i32x4 = transmute(vrsubhn_high_s64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_high_u16() {
-        let a: u8x8 = u8x8::new(1, 2, 0, 0, 0, 0, 0, 0);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x16 = u8x16::new(1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vrsubhn_high_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_high_u32() {
-        let a: u16x4 = u16x4::new(1, 2, 0, 0);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let c: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u16x8 = u16x8::new(1, 2, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vrsubhn_high_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_high_u64() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u64x2 = u64x2::new(1, 2);
-        let c: u64x2 = u64x2::new(1, 2);
-        let e: u32x4 = u32x4::new(1, 2, 0, 0);
-        let r: u32x4 = transmute(vrsubhn_high_u64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 0.;
-        let e: f64 = 1.;
-        let r: f64 = transmute(vset_lane_f64::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_f64() {
-        let a: f64 = 1.;
-        let b: f64x2 = f64x2::new(0., 2.);
-        let e: f64x2 = f64x2::new(1., 2.);
-        let r: f64x2 = transmute(vsetq_lane_f64::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshld_s64() {
-        let a: i64 = 1;
-        let b: i64 = 2;
-        let e: i64 = 4;
-        let r: i64 = vshld_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshld_u64() {
-        let a: u64 = 1;
-        let b: i64 = 2;
-        let e: u64 = 4;
-        let r: u64 = vshld_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_high_n_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: i16x8 = transmute(vshll_high_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_high_n_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 1, 2, 1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(4, 8, 12, 16);
-        let r: i32x4 = transmute(vshll_high_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_high_n_s32() {
-        let a: i32x4 = i32x4::new(0, 0, 1, 2);
-        let e: i64x2 = i64x2::new(4, 8);
-        let r: i64x2 = transmute(vshll_high_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_high_n_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: u16x8 = transmute(vshll_high_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_high_n_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 1, 2, 1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(4, 8, 12, 16);
-        let r: u32x4 = transmute(vshll_high_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_high_n_u32() {
-        let a: u32x4 = u32x4::new(0, 0, 1, 2);
-        let e: u64x2 = u64x2::new(4, 8);
-        let r: u64x2 = transmute(vshll_high_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_high_n_s16() {
-        let a: i8x8 = i8x8::new(1, 2, 5, 6, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(20, 24, 28, 32, 52, 56, 60, 64);
-        let e: i8x16 = i8x16::new(1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vshrn_high_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_high_n_s32() {
-        let a: i16x4 = i16x4::new(1, 2, 5, 6);
-        let b: i32x4 = i32x4::new(20, 24, 28, 32);
-        let e: i16x8 = i16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vshrn_high_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_high_n_s64() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i64x2 = i64x2::new(20, 24);
-        let e: i32x4 = i32x4::new(1, 2, 5, 6);
-        let r: i32x4 = transmute(vshrn_high_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_high_n_u16() {
-        let a: u8x8 = u8x8::new(1, 2, 5, 6, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(20, 24, 28, 32, 52, 56, 60, 64);
-        let e: u8x16 = u8x16::new(1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vshrn_high_n_u16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_high_n_u32() {
-        let a: u16x4 = u16x4::new(1, 2, 5, 6);
-        let b: u32x4 = u32x4::new(20, 24, 28, 32);
-        let e: u16x8 = u16x8::new(1, 2, 5, 6, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vshrn_high_n_u32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_high_n_u64() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u64x2 = u64x2::new(20, 24);
-        let e: u32x4 = u32x4::new(1, 2, 5, 6);
-        let r: u32x4 = transmute(vshrn_high_n_u64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sm4")]
-    unsafe fn test_vsm3partw1q_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let c: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(2147549312, 3221323968, 131329, 2684362752);
-        let r: u32x4 = transmute(vsm3partw1q_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sm4")]
-    unsafe fn test_vsm3partw2q_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let c: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(128, 256, 384, 1077977696);
-        let r: u32x4 = transmute(vsm3partw2q_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sm4")]
-    unsafe fn test_vsm3ss1q_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let c: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(0, 0, 0, 2098176);
-        let r: u32x4 = transmute(vsm3ss1q_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sm4")]
-    unsafe fn test_vsm4ekeyq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(1784948604, 136020997, 2940231695, 3789947679);
-        let r: u32x4 = transmute(vsm4ekeyq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sm4")]
-    unsafe fn test_vsm4eq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(1093874472, 3616769504, 3878330411, 2765298765);
-        let r: u32x4 = transmute(vsm4eq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vrax1q_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(3, 4);
-        let e: u64x2 = u64x2::new(7, 10);
-        let r: u64x2 = transmute(vrax1q_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vsha512hq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(3, 4);
-        let c: u64x2 = u64x2::new(5, 6);
-        let e: u64x2 = u64x2::new(11189044327219203, 7177611956453380);
-        let r: u64x2 = transmute(vsha512hq_u64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vsha512h2q_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(3, 4);
-        let c: u64x2 = u64x2::new(5, 6);
-        let e: u64x2 = u64x2::new(5770237651009406214, 349133864969);
-        let r: u64x2 = transmute(vsha512h2q_u64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vsha512su0q_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(3, 4);
-        let e: u64x2 = u64x2::new(144115188075855874, 9439544818968559619);
-        let r: u64x2 = transmute(vsha512su0q_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vsha512su1q_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(3, 4);
-        let c: u64x2 = u64x2::new(5, 6);
-        let e: u64x2 = u64x2::new(105553116266526, 140737488355368);
-        let r: u64x2 = transmute(vsha512su1q_u64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd32x_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 2.9);
-        let e: f32x2 = f32x2::new(-2.0, 3.0);
-        let r: f32x2 = transmute(vrnd32x_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd32xq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 2.9, 1.5, -2.5);
-        let e: f32x4 = f32x4::new(-2.0, 3.0, 2.0, -2.0);
-        let r: f32x4 = transmute(vrnd32xq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd32xq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 2.9);
-        let e: f64x2 = f64x2::new(-2.0, 3.0);
-        let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(1.5, -2.5);
-        let e: f64x2 = f64x2::new(2.0, -2.0);
-        let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(2147483647.499999762, 2147483647.5);
-        let e: f64x2 = f64x2::new(2147483647.0, -2147483648.0);
-        let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(-2147483647.499999762, -2147483648.500000477);
-        let e: f64x2 = f64x2::new(-2147483647.0, -2147483648.0);
-        let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd32x_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrnd32x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 1.5;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vrnd32x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 2147483647.499999762;
-        let e: f64 = 2147483647.0;
-        let r: f64 = transmute(vrnd32x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -2147483647.499999762;
-        let e: f64 = -2147483647.0;
-        let r: f64 = transmute(vrnd32x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 2.9;
-        let e: f64 = 3.0;
-        let r: f64 = transmute(vrnd32x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -2.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrnd32x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 2147483647.5;
-        let e: f64 = -2147483648.0;
-        let r: f64 = transmute(vrnd32x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -2147483648.500000477;
-        let e: f64 = -2147483648.0;
-        let r: f64 = transmute(vrnd32x_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd32z_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 2.9);
-        let e: f32x2 = f32x2::new(-1.0, 2.0);
-        let r: f32x2 = transmute(vrnd32z_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd32zq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 2.9, 1.5, -2.5);
-        let e: f32x4 = f32x4::new(-1.0, 2.0, 1.0, -2.0);
-        let r: f32x4 = transmute(vrnd32zq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd32zq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 2.9);
-        let e: f64x2 = f64x2::new(-1.0, 2.0);
-        let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(1.5, -2.5);
-        let e: f64x2 = f64x2::new(1.0, -2.0);
-        let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(2147483647.999999762, 2147483648.0);
-        let e: f64x2 = f64x2::new(2147483647.0, -2147483648.0);
-        let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(-2147483647.999999762, -2147483649.0);
-        let e: f64x2 = f64x2::new(-2147483647.0, -2147483648.0);
-        let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd32z_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -1.0;
-        let r: f64 = transmute(vrnd32z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 1.5;
-        let e: f64 = 1.0;
-        let r: f64 = transmute(vrnd32z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 2147483647.999999762;
-        let e: f64 = 2147483647.0;
-        let r: f64 = transmute(vrnd32z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -2147483647.999999762;
-        let e: f64 = -2147483647.0;
-        let r: f64 = transmute(vrnd32z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 2.9;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vrnd32z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -2.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrnd32z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 2147483648.0;
-        let e: f64 = -2147483648.0;
-        let r: f64 = transmute(vrnd32z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -2147483649.0;
-        let e: f64 = -2147483648.0;
-        let r: f64 = transmute(vrnd32z_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd64x_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 2.9);
-        let e: f32x2 = f32x2::new(-2.0, 3.0);
-        let r: f32x2 = transmute(vrnd64x_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd64xq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 2.9, 1.5, -2.5);
-        let e: f32x4 = f32x4::new(-2.0, 3.0, 2.0, -2.0);
-        let r: f32x4 = transmute(vrnd64xq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd64xq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 2.9);
-        let e: f64x2 = f64x2::new(-2.0, 3.0);
-        let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(1.5, -2.5);
-        let e: f64x2 = f64x2::new(2.0, -2.0);
-        let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(9223372036854774784.0, 9223372036854775808.0);
-        let e: f64x2 = f64x2::new(9223372036854774784.0, -9223372036854775808.0);
-        let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(-9223372036854775808.0, -9223372036854777856.0);
-        let e: f64x2 = f64x2::new(-9223372036854775808.0, -9223372036854775808.0);
-        let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd64x_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrnd64x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 1.5;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vrnd64x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 9223372036854774784.0;
-        let e: f64 = 9223372036854774784.0;
-        let r: f64 = transmute(vrnd64x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -9223372036854775808.0;
-        let e: f64 = -9223372036854775808.0;
-        let r: f64 = transmute(vrnd64x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 2.9;
-        let e: f64 = 3.0;
-        let r: f64 = transmute(vrnd64x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -2.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrnd64x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 9223372036854775808.0;
-        let e: f64 = -9223372036854775808.0;
-        let r: f64 = transmute(vrnd64x_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -9223372036854777856.0;
-        let e: f64 = -9223372036854775808.0;
-        let r: f64 = transmute(vrnd64x_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd64z_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 2.9);
-        let e: f32x2 = f32x2::new(-1.0, 2.0);
-        let r: f32x2 = transmute(vrnd64z_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd64zq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 2.9, 1.5, -2.5);
-        let e: f32x4 = f32x4::new(-1.0, 2.0, 1.0, -2.0);
-        let r: f32x4 = transmute(vrnd64zq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd64zq_f64() {
-        let a: f64x2 = f64x2::new(-1.5, 2.9);
-        let e: f64x2 = f64x2::new(-1.0, 2.0);
-        let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(1.5, -2.5);
-        let e: f64x2 = f64x2::new(1.0, -2.0);
-        let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(9223372036854774784.0, 9223372036854775808.0);
-        let e: f64x2 = f64x2::new(9223372036854774784.0, -9223372036854775808.0);
-        let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64x2 = f64x2::new(-9223372036854775808.0, -9223372036854777856.0);
-        let e: f64x2 = f64x2::new(-9223372036854775808.0, -9223372036854775808.0);
-        let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,frintts")]
-    unsafe fn test_vrnd64z_f64() {
-        let a: f64 = -1.5;
-        let e: f64 = -1.0;
-        let r: f64 = transmute(vrnd64z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 1.5;
-        let e: f64 = 1.0;
-        let r: f64 = transmute(vrnd64z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 9223372036854774784.0;
-        let e: f64 = 9223372036854774784.0;
-        let r: f64 = transmute(vrnd64z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -9223372036854775808.0;
-        let e: f64 = -9223372036854775808.0;
-        let r: f64 = transmute(vrnd64z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 2.9;
-        let e: f64 = 2.0;
-        let r: f64 = transmute(vrnd64z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -2.5;
-        let e: f64 = -2.0;
-        let r: f64 = transmute(vrnd64z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = 9223372036854775808.0;
-        let e: f64 = -9223372036854775808.0;
-        let r: f64 = transmute(vrnd64z_f64(transmute(a)));
-        assert_eq!(r, e);
-
-        let a: f64 = -9223372036854777856.0;
-        let e: f64 = -9223372036854775808.0;
-        let r: f64 = transmute(vrnd64z_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1_s8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i8x8 = i8x8::new(0, 1, 4, 5, 8, 9, 12, 13);
-        let r: i8x8 = transmute(vtrn1_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_s8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: i8x16 = i8x16::new(0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29);
-        let r: i8x16 = transmute(vtrn1q_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1_s16() {
-        let a: i16x4 = i16x4::new(0, 2, 4, 6);
-        let b: i16x4 = i16x4::new(1, 3, 5, 7);
-        let e: i16x4 = i16x4::new(0, 1, 4, 5);
-        let r: i16x4 = transmute(vtrn1_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_s16() {
-        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i16x8 = i16x8::new(0, 1, 4, 5, 8, 9, 12, 13);
-        let r: i16x8 = transmute(vtrn1q_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_s32() {
-        let a: i32x4 = i32x4::new(0, 2, 4, 6);
-        let b: i32x4 = i32x4::new(1, 3, 5, 7);
-        let e: i32x4 = i32x4::new(0, 1, 4, 5);
-        let r: i32x4 = transmute(vtrn1q_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1_u8() {
-        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: u8x8 = u8x8::new(0, 1, 4, 5, 8, 9, 12, 13);
-        let r: u8x8 = transmute(vtrn1_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_u8() {
-        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: u8x16 = u8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: u8x16 = u8x16::new(0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29);
-        let r: u8x16 = transmute(vtrn1q_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1_u16() {
-        let a: u16x4 = u16x4::new(0, 2, 4, 6);
-        let b: u16x4 = u16x4::new(1, 3, 5, 7);
-        let e: u16x4 = u16x4::new(0, 1, 4, 5);
-        let r: u16x4 = transmute(vtrn1_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_u16() {
-        let a: u16x8 = u16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: u16x8 = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: u16x8 = u16x8::new(0, 1, 4, 5, 8, 9, 12, 13);
-        let r: u16x8 = transmute(vtrn1q_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_u32() {
-        let a: u32x4 = u32x4::new(0, 2, 4, 6);
-        let b: u32x4 = u32x4::new(1, 3, 5, 7);
-        let e: u32x4 = u32x4::new(0, 1, 4, 5);
-        let r: u32x4 = transmute(vtrn1q_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1_p8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i8x8 = i8x8::new(0, 1, 4, 5, 8, 9, 12, 13);
-        let r: i8x8 = transmute(vtrn1_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_p8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: i8x16 = i8x16::new(0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29);
-        let r: i8x16 = transmute(vtrn1q_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1_p16() {
-        let a: i16x4 = i16x4::new(0, 2, 4, 6);
-        let b: i16x4 = i16x4::new(1, 3, 5, 7);
-        let e: i16x4 = i16x4::new(0, 1, 4, 5);
-        let r: i16x4 = transmute(vtrn1_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_p16() {
-        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i16x8 = i16x8::new(0, 1, 4, 5, 8, 9, 12, 13);
-        let r: i16x8 = transmute(vtrn1q_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1_s32() {
-        let a: i32x2 = i32x2::new(0, 2);
-        let b: i32x2 = i32x2::new(1, 3);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vtrn1_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_s64() {
-        let a: i64x2 = i64x2::new(0, 2);
-        let b: i64x2 = i64x2::new(1, 3);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vtrn1q_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1_u32() {
-        let a: u32x2 = u32x2::new(0, 2);
-        let b: u32x2 = u32x2::new(1, 3);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vtrn1_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_u64() {
-        let a: u64x2 = u64x2::new(0, 2);
-        let b: u64x2 = u64x2::new(1, 3);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vtrn1q_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_p64() {
-        let a: i64x2 = i64x2::new(0, 2);
-        let b: i64x2 = i64x2::new(1, 3);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vtrn1q_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_f32() {
-        let a: f32x4 = f32x4::new(0., 2., 4., 6.);
-        let b: f32x4 = f32x4::new(1., 3., 5., 7.);
-        let e: f32x4 = f32x4::new(0., 1., 4., 5.);
-        let r: f32x4 = transmute(vtrn1q_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1_f32() {
-        let a: f32x2 = f32x2::new(0., 2.);
-        let b: f32x2 = f32x2::new(1., 3.);
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vtrn1_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn1q_f64() {
-        let a: f64x2 = f64x2::new(0., 2.);
-        let b: f64x2 = f64x2::new(1., 3.);
-        let e: f64x2 = f64x2::new(0., 1.);
-        let r: f64x2 = transmute(vtrn1q_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2_s8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i8x8 = i8x8::new(2, 3, 6, 7, 10, 11, 14, 15);
-        let r: i8x8 = transmute(vtrn2_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_s8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: i8x16 = i8x16::new(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31);
-        let r: i8x16 = transmute(vtrn2q_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2_s16() {
-        let a: i16x4 = i16x4::new(0, 2, 4, 6);
-        let b: i16x4 = i16x4::new(1, 3, 5, 7);
-        let e: i16x4 = i16x4::new(2, 3, 6, 7);
-        let r: i16x4 = transmute(vtrn2_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_s16() {
-        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i16x8 = i16x8::new(2, 3, 6, 7, 10, 11, 14, 15);
-        let r: i16x8 = transmute(vtrn2q_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_s32() {
-        let a: i32x4 = i32x4::new(0, 2, 4, 6);
-        let b: i32x4 = i32x4::new(1, 3, 5, 7);
-        let e: i32x4 = i32x4::new(2, 3, 6, 7);
-        let r: i32x4 = transmute(vtrn2q_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2_u8() {
-        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: u8x8 = u8x8::new(2, 3, 6, 7, 10, 11, 14, 15);
-        let r: u8x8 = transmute(vtrn2_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_u8() {
-        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: u8x16 = u8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: u8x16 = u8x16::new(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31);
-        let r: u8x16 = transmute(vtrn2q_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2_u16() {
-        let a: u16x4 = u16x4::new(0, 2, 4, 6);
-        let b: u16x4 = u16x4::new(1, 3, 5, 7);
-        let e: u16x4 = u16x4::new(2, 3, 6, 7);
-        let r: u16x4 = transmute(vtrn2_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_u16() {
-        let a: u16x8 = u16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: u16x8 = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: u16x8 = u16x8::new(2, 3, 6, 7, 10, 11, 14, 15);
-        let r: u16x8 = transmute(vtrn2q_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_u32() {
-        let a: u32x4 = u32x4::new(0, 2, 4, 6);
-        let b: u32x4 = u32x4::new(1, 3, 5, 7);
-        let e: u32x4 = u32x4::new(2, 3, 6, 7);
-        let r: u32x4 = transmute(vtrn2q_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2_p8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i8x8 = i8x8::new(2, 3, 6, 7, 10, 11, 14, 15);
-        let r: i8x8 = transmute(vtrn2_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_p8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: i8x16 = i8x16::new(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31);
-        let r: i8x16 = transmute(vtrn2q_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2_p16() {
-        let a: i16x4 = i16x4::new(0, 2, 4, 6);
-        let b: i16x4 = i16x4::new(1, 3, 5, 7);
-        let e: i16x4 = i16x4::new(2, 3, 6, 7);
-        let r: i16x4 = transmute(vtrn2_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_p16() {
-        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i16x8 = i16x8::new(2, 3, 6, 7, 10, 11, 14, 15);
-        let r: i16x8 = transmute(vtrn2q_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2_s32() {
-        let a: i32x2 = i32x2::new(0, 2);
-        let b: i32x2 = i32x2::new(1, 3);
-        let e: i32x2 = i32x2::new(2, 3);
-        let r: i32x2 = transmute(vtrn2_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_s64() {
-        let a: i64x2 = i64x2::new(0, 2);
-        let b: i64x2 = i64x2::new(1, 3);
-        let e: i64x2 = i64x2::new(2, 3);
-        let r: i64x2 = transmute(vtrn2q_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2_u32() {
-        let a: u32x2 = u32x2::new(0, 2);
-        let b: u32x2 = u32x2::new(1, 3);
-        let e: u32x2 = u32x2::new(2, 3);
-        let r: u32x2 = transmute(vtrn2_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_u64() {
-        let a: u64x2 = u64x2::new(0, 2);
-        let b: u64x2 = u64x2::new(1, 3);
-        let e: u64x2 = u64x2::new(2, 3);
-        let r: u64x2 = transmute(vtrn2q_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_p64() {
-        let a: i64x2 = i64x2::new(0, 2);
-        let b: i64x2 = i64x2::new(1, 3);
-        let e: i64x2 = i64x2::new(2, 3);
-        let r: i64x2 = transmute(vtrn2q_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_f32() {
-        let a: f32x4 = f32x4::new(0., 2., 4., 6.);
-        let b: f32x4 = f32x4::new(1., 3., 5., 7.);
-        let e: f32x4 = f32x4::new(2., 3., 6., 7.);
-        let r: f32x4 = transmute(vtrn2q_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2_f32() {
-        let a: f32x2 = f32x2::new(0., 2.);
-        let b: f32x2 = f32x2::new(1., 3.);
-        let e: f32x2 = f32x2::new(2., 3.);
-        let r: f32x2 = transmute(vtrn2_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn2q_f64() {
-        let a: f64x2 = f64x2::new(0., 2.);
-        let b: f64x2 = f64x2::new(1., 3.);
-        let e: f64x2 = f64x2::new(2., 3.);
-        let r: f64x2 = transmute(vtrn2q_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1_s8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vzip1_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_s8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vzip1q_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1_s16() {
-        let a: i16x4 = i16x4::new(0, 2, 4, 6);
-        let b: i16x4 = i16x4::new(1, 3, 5, 7);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vzip1_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_s16() {
-        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vzip1q_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1_s32() {
-        let a: i32x2 = i32x2::new(0, 2);
-        let b: i32x2 = i32x2::new(1, 3);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vzip1_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_s32() {
-        let a: i32x4 = i32x4::new(0, 2, 4, 6);
-        let b: i32x4 = i32x4::new(1, 3, 5, 7);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vzip1q_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_s64() {
-        let a: i64x2 = i64x2::new(0, 2);
-        let b: i64x2 = i64x2::new(1, 3);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vzip1q_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1_u8() {
-        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u8x8 = transmute(vzip1_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_u8() {
-        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: u8x16 = u8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vzip1q_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1_u16() {
-        let a: u16x4 = u16x4::new(0, 2, 4, 6);
-        let b: u16x4 = u16x4::new(1, 3, 5, 7);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vzip1_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_u16() {
-        let a: u16x8 = u16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: u16x8 = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vzip1q_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1_u32() {
-        let a: u32x2 = u32x2::new(0, 2);
-        let b: u32x2 = u32x2::new(1, 3);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vzip1_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_u32() {
-        let a: u32x4 = u32x4::new(0, 2, 4, 6);
-        let b: u32x4 = u32x4::new(1, 3, 5, 7);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vzip1q_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_u64() {
-        let a: u64x2 = u64x2::new(0, 2);
-        let b: u64x2 = u64x2::new(1, 3);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vzip1q_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1_p8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vzip1_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_p8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vzip1q_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1_p16() {
-        let a: i16x4 = i16x4::new(0, 2, 4, 6);
-        let b: i16x4 = i16x4::new(1, 3, 5, 7);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vzip1_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_p16() {
-        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vzip1q_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_p64() {
-        let a: i64x2 = i64x2::new(0, 2);
-        let b: i64x2 = i64x2::new(1, 3);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vzip1q_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1_f32() {
-        let a: f32x2 = f32x2::new(0., 2.);
-        let b: f32x2 = f32x2::new(1., 3.);
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vzip1_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_f32() {
-        let a: f32x4 = f32x4::new(0., 2., 4., 6.);
-        let b: f32x4 = f32x4::new(1., 3., 5., 7.);
-        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let r: f32x4 = transmute(vzip1q_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip1q_f64() {
-        let a: f64x2 = f64x2::new(0., 2.);
-        let b: f64x2 = f64x2::new(1., 3.);
-        let e: f64x2 = f64x2::new(0., 1.);
-        let r: f64x2 = transmute(vzip1q_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2_s8() {
-        let a: i8x8 = i8x8::new(0, 16, 16, 18, 16, 18, 20, 22);
-        let b: i8x8 = i8x8::new(1, 17, 17, 19, 17, 19, 21, 23);
-        let e: i8x8 = i8x8::new(16, 17, 18, 19, 20, 21, 22, 23);
-        let r: i8x8 = transmute(vzip2_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_s8() {
-        let a: i8x16 = i8x16::new(0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: i8x16 = i8x16::new(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
-        let r: i8x16 = transmute(vzip2q_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2_s16() {
-        let a: i16x4 = i16x4::new(0, 16, 16, 18);
-        let b: i16x4 = i16x4::new(1, 17, 17, 19);
-        let e: i16x4 = i16x4::new(16, 17, 18, 19);
-        let r: i16x4 = transmute(vzip2_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_s16() {
-        let a: i16x8 = i16x8::new(0, 16, 16, 18, 16, 18, 20, 22);
-        let b: i16x8 = i16x8::new(1, 17, 17, 19, 17, 19, 21, 23);
-        let e: i16x8 = i16x8::new(16, 17, 18, 19, 20, 21, 22, 23);
-        let r: i16x8 = transmute(vzip2q_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2_s32() {
-        let a: i32x2 = i32x2::new(0, 16);
-        let b: i32x2 = i32x2::new(1, 17);
-        let e: i32x2 = i32x2::new(16, 17);
-        let r: i32x2 = transmute(vzip2_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_s32() {
-        let a: i32x4 = i32x4::new(0, 16, 16, 18);
-        let b: i32x4 = i32x4::new(1, 17, 17, 19);
-        let e: i32x4 = i32x4::new(16, 17, 18, 19);
-        let r: i32x4 = transmute(vzip2q_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_s64() {
-        let a: i64x2 = i64x2::new(0, 16);
-        let b: i64x2 = i64x2::new(1, 17);
-        let e: i64x2 = i64x2::new(16, 17);
-        let r: i64x2 = transmute(vzip2q_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2_u8() {
-        let a: u8x8 = u8x8::new(0, 16, 16, 18, 16, 18, 20, 22);
-        let b: u8x8 = u8x8::new(1, 17, 17, 19, 17, 19, 21, 23);
-        let e: u8x8 = u8x8::new(16, 17, 18, 19, 20, 21, 22, 23);
-        let r: u8x8 = transmute(vzip2_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_u8() {
-        let a: u8x16 = u8x16::new(0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: u8x16 = u8x16::new(1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: u8x16 = u8x16::new(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
-        let r: u8x16 = transmute(vzip2q_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2_u16() {
-        let a: u16x4 = u16x4::new(0, 16, 16, 18);
-        let b: u16x4 = u16x4::new(1, 17, 17, 19);
-        let e: u16x4 = u16x4::new(16, 17, 18, 19);
-        let r: u16x4 = transmute(vzip2_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_u16() {
-        let a: u16x8 = u16x8::new(0, 16, 16, 18, 16, 18, 20, 22);
-        let b: u16x8 = u16x8::new(1, 17, 17, 19, 17, 19, 21, 23);
-        let e: u16x8 = u16x8::new(16, 17, 18, 19, 20, 21, 22, 23);
-        let r: u16x8 = transmute(vzip2q_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2_u32() {
-        let a: u32x2 = u32x2::new(0, 16);
-        let b: u32x2 = u32x2::new(1, 17);
-        let e: u32x2 = u32x2::new(16, 17);
-        let r: u32x2 = transmute(vzip2_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_u32() {
-        let a: u32x4 = u32x4::new(0, 16, 16, 18);
-        let b: u32x4 = u32x4::new(1, 17, 17, 19);
-        let e: u32x4 = u32x4::new(16, 17, 18, 19);
-        let r: u32x4 = transmute(vzip2q_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_u64() {
-        let a: u64x2 = u64x2::new(0, 16);
-        let b: u64x2 = u64x2::new(1, 17);
-        let e: u64x2 = u64x2::new(16, 17);
-        let r: u64x2 = transmute(vzip2q_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2_p8() {
-        let a: i8x8 = i8x8::new(0, 16, 16, 18, 16, 18, 20, 22);
-        let b: i8x8 = i8x8::new(1, 17, 17, 19, 17, 19, 21, 23);
-        let e: i8x8 = i8x8::new(16, 17, 18, 19, 20, 21, 22, 23);
-        let r: i8x8 = transmute(vzip2_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_p8() {
-        let a: i8x16 = i8x16::new(0, 16, 16, 18, 16, 18, 20, 22, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 17, 17, 19, 17, 19, 21, 23, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: i8x16 = i8x16::new(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
-        let r: i8x16 = transmute(vzip2q_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2_p16() {
-        let a: i16x4 = i16x4::new(0, 16, 16, 18);
-        let b: i16x4 = i16x4::new(1, 17, 17, 19);
-        let e: i16x4 = i16x4::new(16, 17, 18, 19);
-        let r: i16x4 = transmute(vzip2_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_p16() {
-        let a: i16x8 = i16x8::new(0, 16, 16, 18, 16, 18, 20, 22);
-        let b: i16x8 = i16x8::new(1, 17, 17, 19, 17, 19, 21, 23);
-        let e: i16x8 = i16x8::new(16, 17, 18, 19, 20, 21, 22, 23);
-        let r: i16x8 = transmute(vzip2q_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_p64() {
-        let a: i64x2 = i64x2::new(0, 16);
-        let b: i64x2 = i64x2::new(1, 17);
-        let e: i64x2 = i64x2::new(16, 17);
-        let r: i64x2 = transmute(vzip2q_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2_f32() {
-        let a: f32x2 = f32x2::new(0., 8.);
-        let b: f32x2 = f32x2::new(1., 9.);
-        let e: f32x2 = f32x2::new(8., 9.);
-        let r: f32x2 = transmute(vzip2_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_f32() {
-        let a: f32x4 = f32x4::new(0., 8., 8., 10.);
-        let b: f32x4 = f32x4::new(1., 9., 9., 11.);
-        let e: f32x4 = f32x4::new(8., 9., 10., 11.);
-        let r: f32x4 = transmute(vzip2q_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip2q_f64() {
-        let a: f64x2 = f64x2::new(0., 8.);
-        let b: f64x2 = f64x2::new(1., 9.);
-        let e: f64x2 = f64x2::new(8., 9.);
-        let r: f64x2 = transmute(vzip2q_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1_s8() {
-        let a: i8x8 = i8x8::new(1, 0, 2, 0, 2, 0, 3, 0);
-        let b: i8x8 = i8x8::new(2, 0, 3, 0, 7, 0, 8, 0);
-        let e: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 7, 8);
-        let r: i8x8 = transmute(vuzp1_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_s8() {
-        let a: i8x16 = i8x16::new(1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0);
-        let b: i8x16 = i8x16::new(2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0);
-        let e: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vuzp1q_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1_s16() {
-        let a: i16x4 = i16x4::new(1, 0, 2, 0);
-        let b: i16x4 = i16x4::new(2, 0, 3, 0);
-        let e: i16x4 = i16x4::new(1, 2, 2, 3);
-        let r: i16x4 = transmute(vuzp1_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_s16() {
-        let a: i16x8 = i16x8::new(1, 0, 2, 0, 2, 0, 3, 0);
-        let b: i16x8 = i16x8::new(2, 0, 3, 0, 7, 0, 8, 0);
-        let e: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 7, 8);
-        let r: i16x8 = transmute(vuzp1q_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_s32() {
-        let a: i32x4 = i32x4::new(1, 0, 2, 0);
-        let b: i32x4 = i32x4::new(2, 0, 3, 0);
-        let e: i32x4 = i32x4::new(1, 2, 2, 3);
-        let r: i32x4 = transmute(vuzp1q_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1_u8() {
-        let a: u8x8 = u8x8::new(1, 0, 2, 0, 2, 0, 3, 0);
-        let b: u8x8 = u8x8::new(2, 0, 3, 0, 7, 0, 8, 0);
-        let e: u8x8 = u8x8::new(1, 2, 2, 3, 2, 3, 7, 8);
-        let r: u8x8 = transmute(vuzp1_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_u8() {
-        let a: u8x16 = u8x16::new(1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0);
-        let b: u8x16 = u8x16::new(2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0);
-        let e: u8x16 = u8x16::new(1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vuzp1q_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1_u16() {
-        let a: u16x4 = u16x4::new(1, 0, 2, 0);
-        let b: u16x4 = u16x4::new(2, 0, 3, 0);
-        let e: u16x4 = u16x4::new(1, 2, 2, 3);
-        let r: u16x4 = transmute(vuzp1_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_u16() {
-        let a: u16x8 = u16x8::new(1, 0, 2, 0, 2, 0, 3, 0);
-        let b: u16x8 = u16x8::new(2, 0, 3, 0, 7, 0, 8, 0);
-        let e: u16x8 = u16x8::new(1, 2, 2, 3, 2, 3, 7, 8);
-        let r: u16x8 = transmute(vuzp1q_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_u32() {
-        let a: u32x4 = u32x4::new(1, 0, 2, 0);
-        let b: u32x4 = u32x4::new(2, 0, 3, 0);
-        let e: u32x4 = u32x4::new(1, 2, 2, 3);
-        let r: u32x4 = transmute(vuzp1q_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1_p8() {
-        let a: i8x8 = i8x8::new(1, 0, 2, 0, 2, 0, 3, 0);
-        let b: i8x8 = i8x8::new(2, 0, 3, 0, 7, 0, 8, 0);
-        let e: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 7, 8);
-        let r: i8x8 = transmute(vuzp1_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_p8() {
-        let a: i8x16 = i8x16::new(1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 3, 0, 7, 0, 8, 0);
-        let b: i8x16 = i8x16::new(2, 0, 3, 0, 7, 0, 8, 0, 13, 0, 14, 0, 15, 0, 16, 0);
-        let e: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 7, 8, 2, 3, 7, 8, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vuzp1q_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1_p16() {
-        let a: i16x4 = i16x4::new(1, 0, 2, 0);
-        let b: i16x4 = i16x4::new(2, 0, 3, 0);
-        let e: i16x4 = i16x4::new(1, 2, 2, 3);
-        let r: i16x4 = transmute(vuzp1_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_p16() {
-        let a: i16x8 = i16x8::new(1, 0, 2, 0, 2, 0, 3, 0);
-        let b: i16x8 = i16x8::new(2, 0, 3, 0, 7, 0, 8, 0);
-        let e: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 7, 8);
-        let r: i16x8 = transmute(vuzp1q_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1_s32() {
-        let a: i32x2 = i32x2::new(1, 0);
-        let b: i32x2 = i32x2::new(2, 0);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vuzp1_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_s64() {
-        let a: i64x2 = i64x2::new(1, 0);
-        let b: i64x2 = i64x2::new(2, 0);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vuzp1q_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1_u32() {
-        let a: u32x2 = u32x2::new(1, 0);
-        let b: u32x2 = u32x2::new(2, 0);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vuzp1_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_u64() {
-        let a: u64x2 = u64x2::new(1, 0);
-        let b: u64x2 = u64x2::new(2, 0);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vuzp1q_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_p64() {
-        let a: i64x2 = i64x2::new(1, 0);
-        let b: i64x2 = i64x2::new(2, 0);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vuzp1q_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_f32() {
-        let a: f32x4 = f32x4::new(0., 8., 1., 9.);
-        let b: f32x4 = f32x4::new(1., 10., 3., 11.);
-        let e: f32x4 = f32x4::new(0., 1., 1., 3.);
-        let r: f32x4 = transmute(vuzp1q_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1_f32() {
-        let a: f32x2 = f32x2::new(0., 8.);
-        let b: f32x2 = f32x2::new(1., 10.);
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vuzp1_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp1q_f64() {
-        let a: f64x2 = f64x2::new(0., 8.);
-        let b: f64x2 = f64x2::new(1., 10.);
-        let e: f64x2 = f64x2::new(0., 1.);
-        let r: f64x2 = transmute(vuzp1q_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2_s8() {
-        let a: i8x8 = i8x8::new(0, 17, 0, 18, 0, 18, 0, 19);
-        let b: i8x8 = i8x8::new(0, 18, 0, 19, 0, 23, 0, 24);
-        let e: i8x8 = i8x8::new(17, 18, 18, 19, 18, 19, 23, 24);
-        let r: i8x8 = transmute(vuzp2_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_s8() {
-        let a: i8x16 = i8x16::new(0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24);
-        let b: i8x16 = i8x16::new(0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32);
-        let e: i8x16 = i8x16::new(17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32);
-        let r: i8x16 = transmute(vuzp2q_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2_s16() {
-        let a: i16x4 = i16x4::new(0, 17, 0, 18);
-        let b: i16x4 = i16x4::new(0, 18, 0, 19);
-        let e: i16x4 = i16x4::new(17, 18, 18, 19);
-        let r: i16x4 = transmute(vuzp2_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_s16() {
-        let a: i16x8 = i16x8::new(0, 17, 0, 18, 0, 18, 0, 19);
-        let b: i16x8 = i16x8::new(0, 18, 0, 19, 0, 23, 0, 24);
-        let e: i16x8 = i16x8::new(17, 18, 18, 19, 18, 19, 23, 24);
-        let r: i16x8 = transmute(vuzp2q_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_s32() {
-        let a: i32x4 = i32x4::new(0, 17, 0, 18);
-        let b: i32x4 = i32x4::new(0, 18, 0, 19);
-        let e: i32x4 = i32x4::new(17, 18, 18, 19);
-        let r: i32x4 = transmute(vuzp2q_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2_u8() {
-        let a: u8x8 = u8x8::new(0, 17, 0, 18, 0, 18, 0, 19);
-        let b: u8x8 = u8x8::new(0, 18, 0, 19, 0, 23, 0, 24);
-        let e: u8x8 = u8x8::new(17, 18, 18, 19, 18, 19, 23, 24);
-        let r: u8x8 = transmute(vuzp2_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_u8() {
-        let a: u8x16 = u8x16::new(0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24);
-        let b: u8x16 = u8x16::new(0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32);
-        let e: u8x16 = u8x16::new(17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32);
-        let r: u8x16 = transmute(vuzp2q_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2_u16() {
-        let a: u16x4 = u16x4::new(0, 17, 0, 18);
-        let b: u16x4 = u16x4::new(0, 18, 0, 19);
-        let e: u16x4 = u16x4::new(17, 18, 18, 19);
-        let r: u16x4 = transmute(vuzp2_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_u16() {
-        let a: u16x8 = u16x8::new(0, 17, 0, 18, 0, 18, 0, 19);
-        let b: u16x8 = u16x8::new(0, 18, 0, 19, 0, 23, 0, 24);
-        let e: u16x8 = u16x8::new(17, 18, 18, 19, 18, 19, 23, 24);
-        let r: u16x8 = transmute(vuzp2q_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_u32() {
-        let a: u32x4 = u32x4::new(0, 17, 0, 18);
-        let b: u32x4 = u32x4::new(0, 18, 0, 19);
-        let e: u32x4 = u32x4::new(17, 18, 18, 19);
-        let r: u32x4 = transmute(vuzp2q_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2_p8() {
-        let a: i8x8 = i8x8::new(0, 17, 0, 18, 0, 18, 0, 19);
-        let b: i8x8 = i8x8::new(0, 18, 0, 19, 0, 23, 0, 24);
-        let e: i8x8 = i8x8::new(17, 18, 18, 19, 18, 19, 23, 24);
-        let r: i8x8 = transmute(vuzp2_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_p8() {
-        let a: i8x16 = i8x16::new(0, 17, 0, 18, 0, 18, 0, 19, 0, 18, 0, 19, 0, 23, 0, 24);
-        let b: i8x16 = i8x16::new(0, 18, 0, 19, 0, 23, 0, 24, 0, 29, 0, 30, 0, 31, 0, 32);
-        let e: i8x16 = i8x16::new(17, 18, 18, 19, 18, 19, 23, 24, 18, 19, 23, 24, 29, 30, 31, 32);
-        let r: i8x16 = transmute(vuzp2q_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2_p16() {
-        let a: i16x4 = i16x4::new(0, 17, 0, 18);
-        let b: i16x4 = i16x4::new(0, 18, 0, 19);
-        let e: i16x4 = i16x4::new(17, 18, 18, 19);
-        let r: i16x4 = transmute(vuzp2_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_p16() {
-        let a: i16x8 = i16x8::new(0, 17, 0, 18, 0, 18, 0, 19);
-        let b: i16x8 = i16x8::new(0, 18, 0, 19, 0, 23, 0, 24);
-        let e: i16x8 = i16x8::new(17, 18, 18, 19, 18, 19, 23, 24);
-        let r: i16x8 = transmute(vuzp2q_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2_s32() {
-        let a: i32x2 = i32x2::new(0, 17);
-        let b: i32x2 = i32x2::new(0, 18);
-        let e: i32x2 = i32x2::new(17, 18);
-        let r: i32x2 = transmute(vuzp2_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_s64() {
-        let a: i64x2 = i64x2::new(0, 17);
-        let b: i64x2 = i64x2::new(0, 18);
-        let e: i64x2 = i64x2::new(17, 18);
-        let r: i64x2 = transmute(vuzp2q_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2_u32() {
-        let a: u32x2 = u32x2::new(0, 17);
-        let b: u32x2 = u32x2::new(0, 18);
-        let e: u32x2 = u32x2::new(17, 18);
-        let r: u32x2 = transmute(vuzp2_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_u64() {
-        let a: u64x2 = u64x2::new(0, 17);
-        let b: u64x2 = u64x2::new(0, 18);
-        let e: u64x2 = u64x2::new(17, 18);
-        let r: u64x2 = transmute(vuzp2q_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_p64() {
-        let a: i64x2 = i64x2::new(0, 17);
-        let b: i64x2 = i64x2::new(0, 18);
-        let e: i64x2 = i64x2::new(17, 18);
-        let r: i64x2 = transmute(vuzp2q_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_f32() {
-        let a: f32x4 = f32x4::new(0., 8., 1., 9.);
-        let b: f32x4 = f32x4::new(2., 9., 3., 11.);
-        let e: f32x4 = f32x4::new(8., 9., 9., 11.);
-        let r: f32x4 = transmute(vuzp2q_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2_f32() {
-        let a: f32x2 = f32x2::new(0., 8.);
-        let b: f32x2 = f32x2::new(2., 9.);
-        let e: f32x2 = f32x2::new(8., 9.);
-        let r: f32x2 = transmute(vuzp2_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp2q_f64() {
-        let a: f64x2 = f64x2::new(0., 8.);
-        let b: f64x2 = f64x2::new(2., 9.);
-        let e: f64x2 = f64x2::new(8., 9.);
-        let r: f64x2 = transmute(vuzp2q_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_high_u8() {
-        let a: u16x8 = u16x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let c: u8x16 = u8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12);
-        let e: u16x8 = u16x8::new(20, 20, 20, 20, 20, 20, 20, 20);
-        let r: u16x8 = transmute(vabal_high_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_high_u16() {
-        let a: u32x4 = u32x4::new(9, 10, 11, 12);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
-        let c: u16x8 = u16x8::new(10, 10, 10, 10, 20, 0, 2, 4);
-        let e: u32x4 = u32x4::new(20, 20, 20, 20);
-        let r: u32x4 = transmute(vabal_high_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_high_u32() {
-        let a: u64x2 = u64x2::new(15, 16);
-        let b: u32x4 = u32x4::new(1, 2, 15, 16);
-        let c: u32x4 = u32x4::new(10, 10, 10, 12);
-        let e: u64x2 = u64x2::new(20, 20);
-        let r: u64x2 = transmute(vabal_high_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_high_s8() {
-        let a: i16x8 = i16x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let c: i8x16 = i8x16::new(10, 10, 10, 10, 10, 10, 10, 10, 20, 0, 2, 4, 6, 8, 10, 12);
-        let e: i16x8 = i16x8::new(20, 20, 20, 20, 20, 20, 20, 20);
-        let r: i16x8 = transmute(vabal_high_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_high_s16() {
-        let a: i32x4 = i32x4::new(9, 10, 11, 12);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 9, 10, 11, 12);
-        let c: i16x8 = i16x8::new(10, 10, 10, 10, 20, 0, 2, 4);
-        let e: i32x4 = i32x4::new(20, 20, 20, 20);
-        let r: i32x4 = transmute(vabal_high_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_high_s32() {
-        let a: i64x2 = i64x2::new(15, 16);
-        let b: i32x4 = i32x4::new(1, 2, 15, 16);
-        let c: i32x4 = i32x4::new(10, 10, 10, 12);
-        let e: i64x2 = i64x2::new(20, 20);
-        let r: i64x2 = transmute(vabal_high_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabs_s64() {
-        let a: i64x1 = i64x1::new(-9223372036854775808);
-        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x1 = transmute(vqabs_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabsq_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, -7);
-        let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 7);
-        let r: i64x2 = transmute(vqabsq_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabsb_s8() {
-        let a: i8 = -7;
-        let e: i8 = 7;
-        let r: i8 = vqabsb_s8(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabsh_s16() {
-        let a: i16 = -7;
-        let e: i16 = 7;
-        let r: i16 = vqabsh_s16(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabss_s32() {
-        let a: i32 = -7;
-        let e: i32 = 7;
-        let r: i32 = vqabss_s32(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabsd_s64() {
-        let a: i64 = -7;
-        let e: i64 = 7;
-        let r: i64 = vqabsd_s64(a);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vslid_n_s64() {
-        let a: i64 = 333;
-        let b: i64 = 2042;
-        let e: i64 = 8169;
-        let r: i64 = vslid_n_s64::<2>(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vslid_n_u64() {
-        let a: u64 = 333;
-        let b: u64 = 2042;
-        let e: u64 = 8169;
-        let r: u64 = vslid_n_u64::<2>(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsrid_n_s64() {
-        let a: i64 = 333;
-        let b: i64 = 2042;
-        let e: i64 = 510;
-        let r: i64 = vsrid_n_s64::<2>(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsrid_n_u64() {
-        let a: u64 = 333;
-        let b: u64 = 2042;
-        let e: u64 = 510;
-        let r: u64 = vsrid_n_u64::<2>(a, b);
-        assert_eq!(r, e);
-    }
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+pub unsafe fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    simd_shuffle!(a, b, [1, 3])
 }
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
index b8328887e6..e89067142b 100644
--- a/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -1,3080 +1,4329 @@
 // This code is automatically generated. DO NOT MODIFY.
 //
-// Instead, modify `crates/stdarch-gen-arm/neon.spec` and run the following command to re-generate this file:
+// Instead, modify `crates/stdarch-gen2/spec/` and run the following command to re-generate this file:
 //
 // ```
-// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen-arm -- crates/stdarch-gen-arm/neon.spec
+// cargo run --bin=stdarch-gen2 -- crates/stdarch-gen2/spec
 // ```
-use super::*;
+#![allow(improper_ctypes)]
+
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s8)
+use super::*;
+
+#[doc = "Signed Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vand_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    let d: int8x8_t = vabd_s8(b, c);
+    let e: uint8x8_t = simd_cast(d);
+    simd_add(a, simd_cast(e))
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s8)
+#[doc = "Signed Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    let d: int16x4_t = vabd_s16(b, c);
+    let e: uint16x4_t = simd_cast(d);
+    simd_add(a, simd_cast(e))
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s16)
+#[doc = "Signed Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vand_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    let d: int32x2_t = vabd_s32(b, c);
+    let e: uint32x2_t = simd_cast(d);
+    simd_add(a, simd_cast(e))
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s16)
+#[doc = "Unsigned Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    let d: uint8x8_t = vabd_u8(b, c);
+    simd_add(a, simd_cast(d))
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s32)
+#[doc = "Unsigned Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vand_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    let d: uint16x4_t = vabd_u16(b, c);
+    simd_add(a, simd_cast(d))
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s32)
+#[doc = "Unsigned Absolute difference and Accumulate Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vandq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    let d: uint32x2_t = vabd_u32(b, c);
+    simd_add(a, simd_cast(d))
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u8)
+#[doc = "Absolute difference between the arguments of Floating"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vand_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fabd.v2f32"
+        )]
+        fn _vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vabd_f32(a, b)
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u8)
+#[doc = "Absolute difference between the arguments of Floating"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fabd.v4f32"
+        )]
+        fn _vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vabdq_f32(a, b)
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u16)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vand_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sabd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i8")]
+        fn _vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vabd_s8(a, b)
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u16)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vandq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sabd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v16i8")]
+        fn _vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vabdq_s8(a, b)
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u32)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vand_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sabd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i16")]
+        fn _vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vabd_s16(a, b)
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u32)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vandq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sabd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i16")]
+        fn _vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vabdq_s16(a, b)
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s64)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vand_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sabd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2i32")]
+        fn _vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vabd_s32(a, b)
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s64)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vandq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sabd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i32")]
+        fn _vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vabdq_s32(a, b)
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u64)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vand_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uabd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i8")]
+        fn _vabd_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vabd_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector bitwise and
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u64)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(and))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_and(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uabd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v16i8")]
+        fn _vabdq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vabdq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uabd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i16")]
+        fn _vabd_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vabd_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uabd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i16")]
+        fn _vabdq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vabdq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uabd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v2i32")]
+        fn _vabd_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vabd_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)
+#[doc = "Absolute difference between the arguments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uabd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i32")]
+        fn _vabdq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vabdq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)
+#[doc = "Signed Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabdl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
+    let c: uint8x8_t = simd_cast(vabd_s8(a, b));
+    simd_cast(c)
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)
+#[doc = "Signed Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabdl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    let c: uint16x4_t = simd_cast(vabd_s16(a, b));
+    simd_cast(c)
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)
+#[doc = "Signed Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sabdl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    let c: uint32x2_t = simd_cast(vabd_s32(a, b));
+    simd_cast(c)
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)
+#[doc = "Unsigned Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabdl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
+    simd_cast(vabd_u8(a, b))
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)
+#[doc = "Unsigned Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabdl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    simd_cast(vabd_u16(a, b))
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)
+#[doc = "Unsigned Absolute difference Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uabdl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    simd_cast(vabd_u32(a, b))
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)
+#[doc = "Floating-point absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabs_f32(a: float32x2_t) -> float32x2_t {
+    simd_fabs(a)
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)
+#[doc = "Floating-point absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabsq_f32(a: float32x4_t) -> float32x4_t {
+    simd_fabs(a)
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vadd_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    simd_xor(a, b)
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vaddq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    simd_xor(a, b)
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vadd_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    simd_xor(a, b)
 }
 
-/// Vector bitwise or (immediate, inclusive)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(orr))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_or(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vaddq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    simd_xor(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
     simd_xor(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     simd_xor(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vaddq_p128(a: p128, b: p128) -> p128 {
+    a ^ b
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vand_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_xor(a, b)
-}
-
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vand_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vand_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vandq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vand_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vandq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vand_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vand_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_and(a, b)
 }
 
-/// Vector bitwise exclusive or (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(eor))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_xor(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vandq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_and(a, b)
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_s8)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v8i8")]
-        fn vabd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vabd_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vand_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_and(a, b)
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_s8)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v16i8")]
-        fn vabdq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vabdq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vandq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_and(a, b)
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_s16)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v4i16")]
-        fn vabd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vabd_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vand_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_and(a, b)
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_s16)
+#[doc = "Vector bitwise and"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vandq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v8i16")]
-        fn vabdq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vabdq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(and)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_and(a, b)
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_s32)
+#[doc = "Floating-point absolute compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(facge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v2i32")]
-        fn vabd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v2i32.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facge.v2i32.v2f32"
+        )]
+        fn _vcage_f32(a: float32x2_t, b: float32x2_t) -> int32x2_t;
     }
-vabd_s32_(a, b)
+    _vcage_f32(a, b).as_unsigned()
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_s32)
+#[doc = "Floating-point absolute compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(facge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sabd.v4i32")]
-        fn vabdq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i32.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facge.v4i32.v4f32"
+        )]
+        fn _vcageq_f32(a: float32x4_t, b: float32x4_t) -> int32x4_t;
     }
-vabdq_s32_(a, b)
+    _vcageq_f32(a, b).as_unsigned()
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_u8)
+#[doc = "Floating-point absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(facgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v8i8")]
-        fn vabd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v2i32.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facgt.v2i32.v2f32"
+        )]
+        fn _vcagt_f32(a: float32x2_t, b: float32x2_t) -> int32x2_t;
     }
-vabd_u8_(a, b)
+    _vcagt_f32(a, b).as_unsigned()
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_u8)
+#[doc = "Floating-point absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(facgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v16i8")]
-        fn vabdq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i32.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.facgt.v4i32.v4f32"
+        )]
+        fn _vcagtq_f32(a: float32x4_t, b: float32x4_t) -> int32x4_t;
     }
-vabdq_u8_(a, b)
+    _vcagtq_f32(a, b).as_unsigned()
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_u16)
+#[doc = "Floating-point absolute compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v4i16")]
-        fn vabd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-vabd_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(facge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcale_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
+    vcage_f32(b, a)
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_u16)
+#[doc = "Floating-point absolute compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v8i16")]
-        fn vabdq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-vabdq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(facge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
+    vcageq_f32(b, a)
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_u32)
+#[doc = "Floating-point absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v2i32")]
-        fn vabd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-vabd_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(facgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcalt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
+    vcagt_f32(b, a)
 }
 
-/// Absolute difference between the arguments
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_u32)
+#[doc = "Floating-point absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uabd.v4i32")]
-        fn vabdq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-vabdq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(facgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
+    vcagtq_f32(b, a)
 }
 
-/// Absolute difference between the arguments of Floating
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f32)
+#[doc = "Floating-point compare equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fabd.v2f32")]
-        fn vabd_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vabd_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceq_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
+    simd_eq(a, b)
 }
 
-/// Absolute difference between the arguments of Floating
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdq_f32)
+#[doc = "Floating-point compare equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fabd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fabd.v4f32")]
-        fn vabdq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vabdq_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceqq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
+    simd_eq(a, b)
 }
 
-/// Unsigned Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_u8)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabdl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
-    simd_cast(vabd_u8(a, b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceq_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
+    simd_eq(a, b)
 }
 
-/// Unsigned Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_u16)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabdl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    simd_cast(vabd_u16(a, b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceqq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
+    simd_eq(a, b)
 }
 
-/// Unsigned Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_u32)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabdl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    simd_cast(vabd_u32(a, b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceq_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
+    simd_eq(a, b)
 }
 
-/// Signed Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_s8)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabdl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
-    let c: uint8x8_t = simd_cast(vabd_s8(a, b));
-    simd_cast(c)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceqq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
+    simd_eq(a, b)
 }
 
-/// Signed Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_s16)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabdl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    let c: uint16x4_t = simd_cast(vabd_s16(a, b));
-    simd_cast(c)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceq_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
+    simd_eq(a, b)
 }
 
-/// Signed Absolute difference Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_s32)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabdl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabdl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    let c: uint32x2_t = simd_cast(vabd_s32(a, b));
-    simd_cast(c)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceqq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
+    simd_eq(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u8)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vceq_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     simd_eq(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u8)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vceqq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     simd_eq(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u16)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vceq_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     simd_eq(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u16)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vceqq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     simd_eq(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u32)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vceq_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     simd_eq(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u32)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vceqq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     simd_eq(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s8)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceq_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceq_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
     simd_eq(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s8)
+#[doc = "Compare bitwise Equal (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceqq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmeq)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
     simd_eq(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s16)
+#[doc = "Floating-point compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceq_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
-    simd_eq(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcge_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
+    simd_ge(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s16)
+#[doc = "Floating-point compare greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceqq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
-    simd_eq(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
+    simd_ge(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_s32)
+#[doc = "Compare signed greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceq_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
-    simd_eq(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcge_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
+    simd_ge(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_s32)
+#[doc = "Compare signed greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceqq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
-    simd_eq(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcgeq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
+    simd_ge(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_p8)
+#[doc = "Compare signed greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceq_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
-    simd_eq(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcge_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
+    simd_ge(a, b)
 }
 
-/// Compare bitwise Equal (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_p8)
+#[doc = "Compare signed greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
-    simd_eq(a, b)
-}
-
-/// Floating-point compare equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceq_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceq_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    simd_eq(a, b)
-}
-
-/// Floating-point compare equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqq_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmeq))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vceqq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    simd_eq(a, b)
-}
-
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
-    let c: int8x8_t = simd_and(a, b);
-    let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_ne(c, transmute(d))
-}
-
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
-    let c: int8x16_t = simd_and(a, b);
-    let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-    simd_ne(c, transmute(d))
-}
-
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
-    let c: int16x4_t = simd_and(a, b);
-    let d: i16x4 = i16x4::new(0, 0, 0, 0);
-    simd_ne(c, transmute(d))
-}
-
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
-    let c: int16x8_t = simd_and(a, b);
-    let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_ne(c, transmute(d))
-}
-
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
-    let c: int32x2_t = simd_and(a, b);
-    let d: i32x2 = i32x2::new(0, 0);
-    simd_ne(c, transmute(d))
-}
-
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
-    let c: int32x4_t = simd_and(a, b);
-    let d: i32x4 = i32x4::new(0, 0, 0, 0);
-    simd_ne(c, transmute(d))
-}
-
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
-    let c: poly8x8_t = simd_and(a, b);
-    let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_ne(c, transmute(d))
-}
-
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
-    let c: poly8x16_t = simd_and(a, b);
-    let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-    simd_ne(c, transmute(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcgeq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
+    simd_ge(a, b)
 }
 
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p16)
+#[doc = "Compare signed greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t {
-    let c: poly16x4_t = simd_and(a, b);
-    let d: i16x4 = i16x4::new(0, 0, 0, 0);
-    simd_ne(c, transmute(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcge_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
+    simd_ge(a, b)
 }
 
-/// Signed compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p16)
+#[doc = "Compare signed greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t {
-    let c: poly16x8_t = simd_and(a, b);
-    let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_ne(c, transmute(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcgeq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
+    simd_ge(a, b)
 }
 
-/// Unsigned compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u8)
+#[doc = "Compare unsigned greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    let c: uint8x8_t = simd_and(a, b);
-    let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_ne(c, transmute(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcge_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_ge(a, b)
 }
 
-/// Unsigned compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u8)
+#[doc = "Compare unsigned greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    let c: uint8x16_t = simd_and(a, b);
-    let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-    simd_ne(c, transmute(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcgeq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_ge(a, b)
 }
 
-/// Unsigned compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u16)
+#[doc = "Compare unsigned greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    let c: uint16x4_t = simd_and(a, b);
-    let d: u16x4 = u16x4::new(0, 0, 0, 0);
-    simd_ne(c, transmute(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcge_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_ge(a, b)
 }
 
-/// Unsigned compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u16)
+#[doc = "Compare unsigned greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    let c: uint16x8_t = simd_and(a, b);
-    let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-    simd_ne(c, transmute(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcgeq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_ge(a, b)
 }
 
-/// Unsigned compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u32)
+#[doc = "Compare unsigned greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    let c: uint32x2_t = simd_and(a, b);
-    let d: u32x2 = u32x2::new(0, 0);
-    simd_ne(c, transmute(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_ge(a, b)
 }
 
-/// Unsigned compare bitwise Test bits nonzero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u32)
+#[doc = "Compare unsigned greater than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmtst))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    let c: uint32x4_t = simd_and(a, b);
-    let d: u32x4 = u32x4::new(0, 0, 0, 0);
-    simd_ne(c, transmute(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_ge(a, b)
 }
 
-/// Floating-point absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f32)
+#[doc = "Floating-point compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fabs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabs_f32(a: float32x2_t) -> float32x2_t {
-    simd_fabs(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcgt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
+    simd_gt(a, b)
 }
 
-/// Floating-point absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_f32)
+#[doc = "Floating-point compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fabs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabsq_f32(a: float32x4_t) -> float32x4_t {
-    simd_fabs(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcgtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
+    simd_gt(a, b)
 }
 
-/// Compare signed greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s8)
+#[doc = "Compare signed greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
     simd_gt(a, b)
 }
 
-/// Compare signed greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s8)
+#[doc = "Compare signed greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgtq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
     simd_gt(a, b)
 }
 
-/// Compare signed greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s16)
+#[doc = "Compare signed greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
     simd_gt(a, b)
 }
 
-/// Compare signed greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s16)
+#[doc = "Compare signed greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgtq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
     simd_gt(a, b)
 }
 
-/// Compare signed greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s32)
+#[doc = "Compare signed greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
     simd_gt(a, b)
 }
 
-/// Compare signed greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s32)
+#[doc = "Compare signed greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgtq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
     simd_gt(a, b)
 }
 
-/// Compare unsigned greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u8)
+#[doc = "Compare unsigned greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     simd_gt(a, b)
 }
 
-/// Compare unsigned greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u8)
+#[doc = "Compare unsigned greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgtq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     simd_gt(a, b)
 }
 
-/// Compare unsigned greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u16)
+#[doc = "Compare unsigned greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     simd_gt(a, b)
 }
 
-/// Compare unsigned greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u16)
+#[doc = "Compare unsigned greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgtq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     simd_gt(a, b)
 }
 
-/// Compare unsigned greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u32)
+#[doc = "Compare unsigned greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     simd_gt(a, b)
 }
 
-/// Compare unsigned greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u32)
+#[doc = "Compare unsigned greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     simd_gt(a, b)
 }
 
-/// Floating-point compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f32)
+#[doc = "Floating-point compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcgt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    simd_gt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcle_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
+    simd_le(a, b)
 }
 
-/// Floating-point compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtq_f32)
+#[doc = "Floating-point compare less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcgtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    simd_gt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
+    simd_le(a, b)
 }
 
-/// Compare signed less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s8)
+#[doc = "Compare signed less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcle_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
+    simd_le(a, b)
 }
 
-/// Compare signed less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s8)
+#[doc = "Compare signed less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcltq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcleq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
+    simd_le(a, b)
 }
 
-/// Compare signed less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s16)
+#[doc = "Compare signed less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcle_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
+    simd_le(a, b)
 }
 
-/// Compare signed less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s16)
+#[doc = "Compare signed less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcltq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcleq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
+    simd_le(a, b)
 }
 
-/// Compare signed less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s32)
+#[doc = "Compare signed less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcle_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
+    simd_le(a, b)
 }
 
-/// Compare signed less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s32)
+#[doc = "Compare signed less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcltq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmge)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcleq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
+    simd_le(a, b)
 }
 
-/// Compare unsigned less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u8)
+#[doc = "Compare unsigned less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcle_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_le(a, b)
 }
 
-/// Compare unsigned less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u8)
+#[doc = "Compare unsigned less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcltq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcleq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_le(a, b)
 }
 
-/// Compare unsigned less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u16)
+#[doc = "Compare unsigned less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcle_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_le(a, b)
 }
 
-/// Compare unsigned less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u16)
+#[doc = "Compare unsigned less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcltq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcleq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_le(a, b)
 }
 
-/// Compare unsigned less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u32)
+#[doc = "Compare unsigned less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_le(a, b)
 }
 
-/// Compare unsigned less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u32)
+#[doc = "Compare unsigned less than or equal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhi))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_lt(a, b)
-}
-
-/// Floating-point compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_le(a, b)
 }
 
-/// Floating-point compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_f32)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    simd_lt(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcls_s8(a: int8x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.cls.v8i8"
+        )]
+        fn _vcls_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vcls_s8(a)
 }
 
-/// Compare signed less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s8)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcle_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclsq_s8(a: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.cls.v16i8"
+        )]
+        fn _vclsq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vclsq_s8(a)
 }
 
-/// Compare signed less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s8)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcleq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcls_s16(a: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.cls.v4i16"
+        )]
+        fn _vcls_s16(a: int16x4_t) -> int16x4_t;
+    }
+    _vcls_s16(a)
 }
 
-/// Compare signed less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s16)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcle_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclsq_s16(a: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.cls.v8i16"
+        )]
+        fn _vclsq_s16(a: int16x8_t) -> int16x8_t;
+    }
+    _vclsq_s16(a)
 }
 
-/// Compare signed less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s16)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcleq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcls_s32(a: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.cls.v2i32"
+        )]
+        fn _vcls_s32(a: int32x2_t) -> int32x2_t;
+    }
+    _vcls_s32(a)
 }
 
-/// Compare signed less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_s32)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcle_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.cls.v4i32"
+        )]
+        fn _vclsq_s32(a: int32x4_t) -> int32x4_t;
+    }
+    _vclsq_s32(a)
 }
 
-/// Compare signed less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_s32)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcleq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcls_u8(a: uint8x8_t) -> int8x8_t {
+    vcls_s8(transmute(a))
 }
 
-/// Compare unsigned less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u8)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcle_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclsq_u8(a: uint8x16_t) -> int8x16_t {
+    vclsq_s8(transmute(a))
 }
 
-/// Compare unsigned less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u8)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcleq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcls_u16(a: uint16x4_t) -> int16x4_t {
+    vcls_s16(transmute(a))
 }
 
-/// Compare unsigned less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u16)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcle_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclsq_u16(a: uint16x8_t) -> int16x8_t {
+    vclsq_s16(transmute(a))
 }
 
-/// Compare unsigned less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u16)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcleq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcls_u32(a: uint32x2_t) -> int32x2_t {
+    vcls_s32(transmute(a))
 }
 
-/// Compare unsigned less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_u32)
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclsq_u32(a: uint32x4_t) -> int32x4_t {
+    vclsq_s32(transmute(a))
 }
 
-/// Compare unsigned less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_u32)
+#[doc = "Floating-point compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
+    simd_lt(a, b)
 }
 
-/// Floating-point compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f32)
+#[doc = "Floating-point compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcle_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
+    simd_lt(a, b)
 }
 
-/// Floating-point compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcleq_f32)
+#[doc = "Compare signed less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    simd_le(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
+    simd_lt(a, b)
 }
 
-/// Compare signed greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s8)
+#[doc = "Compare signed less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcge_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcltq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
+    simd_lt(a, b)
 }
 
-/// Compare signed greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s8)
+#[doc = "Compare signed less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcgeq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
+    simd_lt(a, b)
 }
 
-/// Compare signed greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s16)
+#[doc = "Compare signed less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcge_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcltq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
+    simd_lt(a, b)
 }
 
-/// Compare signed greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s16)
+#[doc = "Compare signed less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcgeq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
+    simd_lt(a, b)
 }
 
-/// Compare signed greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_s32)
+#[doc = "Compare signed less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcge_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmgt)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcltq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
+    simd_lt(a, b)
 }
 
-/// Compare signed greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_s32)
+#[doc = "Compare unsigned less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcgeq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_lt(a, b)
 }
 
-/// Compare unsigned greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u8)
+#[doc = "Compare unsigned less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcge_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcltq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_lt(a, b)
 }
 
-/// Compare unsigned greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u8)
+#[doc = "Compare unsigned less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcgeq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_lt(a, b)
 }
 
-/// Compare unsigned greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u16)
+#[doc = "Compare unsigned less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcge_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcltq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_lt(a, b)
 }
 
-/// Compare unsigned greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u16)
+#[doc = "Compare unsigned less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcgeq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_lt(a, b)
 }
 
-/// Compare unsigned greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_u32)
+#[doc = "Compare unsigned less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmhi)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_lt(a, b)
 }
 
-/// Compare unsigned greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_u32)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cmhs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t {
+    vclz_s16_(a)
 }
 
-/// Floating-point compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f32)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcge_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t {
+    vclzq_s16_(a)
 }
 
-/// Floating-point compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgeq_f32)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcmge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    simd_ge(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t {
+    vclz_s32_(a)
 }
 
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s8)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcls_s8(a: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v8i8")]
-        fn vcls_s8_(a: int8x8_t) -> int8x8_t;
-    }
-vcls_s8_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t {
+    vclzq_s32_(a)
 }
 
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_s8)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclsq_s8(a: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v16i8")]
-        fn vclsq_s8_(a: int8x16_t) -> int8x16_t;
-    }
-vclsq_s8_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t {
+    vclz_s8_(a)
 }
 
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s16)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcls_s16(a: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v4i16")]
-        fn vcls_s16_(a: int16x4_t) -> int16x4_t;
-    }
-vcls_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t {
+    vclzq_s8_(a)
 }
 
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_s16)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclsq_s16(a: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v8i16")]
-        fn vclsq_s16_(a: int16x8_t) -> int16x8_t;
-    }
-vclsq_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
+    transmute(vclz_s16_(transmute(a)))
 }
 
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_s32)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcls_s32(a: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v2i32")]
-        fn vcls_s32_(a: int32x2_t) -> int32x2_t;
-    }
-vcls_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
+    transmute(vclzq_s16_(transmute(a)))
 }
 
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_s32)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.cls.v4i32")]
-        fn vclsq_s32_(a: int32x4_t) -> int32x4_t;
-    }
-vclsq_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
+    transmute(vclz_s32_(transmute(a)))
 }
 
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_u8)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcls_u8(a: uint8x8_t) -> int8x8_t {
-    vcls_s8(transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
+    transmute(vclzq_s32_(transmute(a)))
 }
 
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_u8)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclsq_u8(a: uint8x16_t) -> int8x16_t {
-    vclsq_s8(transmute(a))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
+    transmute(vclz_s8_(transmute(a)))
 }
 
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcls_u16(a: uint16x4_t) -> int16x4_t {
-    vcls_s16(transmute(a))
-}
-
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclsq_u16(a: uint16x8_t) -> int16x8_t {
-    vclsq_s16(transmute(a))
-}
-
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcls_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcls_u32(a: uint32x2_t) -> int32x2_t {
-    vcls_s32(transmute(a))
-}
-
-/// Count leading sign bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclsq_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(cls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclsq_u32(a: uint32x4_t) -> int32x4_t {
-    vclsq_s32(transmute(a))
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t {
-    vclz_s8_(a)
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t {
-    vclzq_s8_(a)
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t {
-    vclz_s16_(a)
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t {
-    vclzq_s16_(a)
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t {
-    vclz_s32_(a)
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t {
-    vclzq_s32_(a)
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
-    transmute(vclz_s8_(transmute(a)))
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
     transmute(vclzq_s8_(transmute(a)))
 }
 
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
-    transmute(vclz_s16_(transmute(a)))
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
-    transmute(vclzq_s16_(transmute(a)))
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
-    transmute(vclz_s32_(transmute(a)))
-}
-
-/// Count leading zero bits
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(clz))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
-    transmute(vclzq_s32_(transmute(a)))
-}
-
-/// Floating-point absolute compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagt_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v2i32.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.v2i32.v2f32")]
-        fn vcagt_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t;
-    }
-vcagt_f32_(a, b)
-}
-
-/// Floating-point absolute compare greater than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcagtq_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i32.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facgt.v4i32.v4f32")]
-        fn vcagtq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t;
-    }
-vcagtq_f32_(a, b)
-}
-
-/// Floating-point absolute compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcage_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v2i32.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.v2i32.v2f32")]
-        fn vcage_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t;
-    }
-vcage_f32_(a, b)
-}
-
-/// Floating-point absolute compare greater than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcageq_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i32.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.facge.v4i32.v4f32")]
-        fn vcageq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t;
-    }
-vcageq_f32_(a, b)
-}
-
-/// Floating-point absolute compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcalt_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcalt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    vcagt_f32(b, a)
-}
-
-/// Floating-point absolute compare less than
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaltq_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facgt))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    vcagtq_f32(b, a)
-}
-
-/// Floating-point absolute compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcale_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcale_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t {
-    vcage_f32(b, a)
-}
-
-/// Floating-point absolute compare less than or equal
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaleq_f32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(facge))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
-    vcageq_f32(b, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcreate_f32(a: u64) -> float32x2_t {
+    transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_s8(a: u64) -> int8x8_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_s16(a: u64) -> int16x4_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_s32(a: u64) -> int32x2_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_s64(a: u64) -> int64x1_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_u32(a: u64) -> uint32x2_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_u64(a: u64) -> uint64x1_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_p8(a: u64) -> poly8x8_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcreate_p64(a: u64) -> poly64x1_t {
     transmute(a)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vcreate_f32(a: u64) -> float32x2_t {
-    transmute(a)
-}
-
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_s32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(scvtf))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(scvtf)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcvt_f32_s32(a: int32x2_t) -> float32x2_t {
     simd_cast(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_s32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(scvtf))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(scvtf)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcvtq_f32_s32(a: int32x4_t) -> float32x4_t {
     simd_cast(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_u32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ucvtf))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ucvtf)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcvt_f32_u32(a: uint32x2_t) -> float32x2_t {
     simd_cast(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_u32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ucvtf))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ucvtf)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t {
     simd_cast(a)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -3083,36 +4332,20 @@ pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32")]
-        fn vcvt_n_f32_s32_(a: int32x2_t, n: i32) -> float32x2_t;
-    }
-vcvt_n_f32_s32_(a, N)
-}
-
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)
-#[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32")]
-        fn vcvt_n_f32_s32_(a: int32x2_t, n: i32) -> float32x2_t;
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32"
+        )]
+        fn _vcvt_n_f32_s32(a: int32x2_t, n: i32) -> float32x2_t;
     }
-vcvt_n_f32_s32_(a, N)
+    _vcvt_n_f32_s32(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -3121,74 +4354,86 @@ vcvt_n_f32_s32_(a, N)
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32")]
-        fn vcvtq_n_f32_s32_(a: int32x4_t, n: i32) -> float32x4_t;
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32"
+        )]
+        fn _vcvtq_n_f32_s32(a: int32x4_t, n: i32) -> float32x4_t;
     }
-vcvtq_n_f32_s32_(a, N)
+    _vcvtq_n_f32_s32(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(scvtf, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
+pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32")]
-        fn vcvtq_n_f32_s32_(a: int32x4_t, n: i32) -> float32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32"
+        )]
+        fn _vcvt_n_f32_s32(a: int32x2_t, n: i32) -> float32x2_t;
     }
-vcvtq_n_f32_s32_(a, N)
+    _vcvt_n_f32_s32(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32")]
-        fn vcvt_n_f32_u32_(a: uint32x2_t, n: i32) -> float32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32"
+        )]
+        fn _vcvtq_n_f32_s32(a: int32x4_t, n: i32) -> float32x4_t;
     }
-vcvt_n_f32_u32_(a, N)
+    _vcvtq_n_f32_s32(a, N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32")]
-        fn vcvt_n_f32_u32_(a: uint32x2_t, n: i32) -> float32x2_t;
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32"
+        )]
+        fn _vcvt_n_f32_u32(a: int32x2_t, n: i32) -> float32x2_t;
     }
-vcvt_n_f32_u32_(a, N)
+    _vcvt_n_f32_u32(a.as_signed(), N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -3197,74 +4442,86 @@ vcvt_n_f32_u32_(a, N)
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32")]
-        fn vcvtq_n_f32_u32_(a: uint32x4_t, n: i32) -> float32x4_t;
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32"
+        )]
+        fn _vcvtq_n_f32_u32(a: int32x4_t, n: i32) -> float32x4_t;
     }
-vcvtq_n_f32_u32_(a, N)
+    _vcvtq_n_f32_u32(a.as_signed(), N)
 }
 
-/// Fixed-point convert to floating-point
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(ucvtf, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
+pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32")]
-        fn vcvtq_n_f32_u32_(a: uint32x4_t, n: i32) -> float32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32"
+        )]
+        fn _vcvt_n_f32_u32(a: int32x2_t, n: i32) -> float32x2_t;
     }
-vcvtq_n_f32_u32_(a, N)
+    _vcvt_n_f32_u32(a.as_signed(), N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32")]
-        fn vcvt_n_s32_f32_(a: float32x2_t, n: i32) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32"
+        )]
+        fn _vcvtq_n_f32_u32(a: int32x4_t, n: i32) -> float32x4_t;
     }
-vcvt_n_s32_f32_(a, N)
+    _vcvtq_n_f32_u32(a.as_signed(), N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32")]
-        fn vcvt_n_s32_f32_(a: float32x2_t, n: i32) -> int32x2_t;
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32"
+        )]
+        fn _vcvt_n_s32_f32(a: float32x2_t, n: i32) -> int32x2_t;
     }
-vcvt_n_s32_f32_(a, N)
+    _vcvt_n_s32_f32(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -3273,74 +4530,86 @@ vcvt_n_s32_f32_(a, N)
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32")]
-        fn vcvtq_n_s32_f32_(a: float32x4_t, n: i32) -> int32x4_t;
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32"
+        )]
+        fn _vcvtq_n_s32_f32(a: float32x4_t, n: i32) -> int32x4_t;
     }
-vcvtq_n_s32_f32_(a, N)
+    _vcvtq_n_s32_f32(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
+pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32")]
-        fn vcvtq_n_s32_f32_(a: float32x4_t, n: i32) -> int32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32"
+        )]
+        fn _vcvt_n_s32_f32(a: float32x2_t, n: i32) -> int32x2_t;
     }
-vcvtq_n_s32_f32_(a, N)
+    _vcvt_n_s32_f32(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32")]
-        fn vcvt_n_u32_f32_(a: float32x2_t, n: i32) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32"
+        )]
+        fn _vcvtq_n_s32_f32(a: float32x4_t, n: i32) -> int32x4_t;
     }
-vcvt_n_u32_f32_(a, N)
+    _vcvtq_n_s32_f32(a, N)
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32")]
-        fn vcvt_n_u32_f32_(a: float32x2_t, n: i32) -> uint32x2_t;
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32"
+        )]
+        fn _vcvt_n_u32_f32(a: float32x2_t, n: i32) -> int32x2_t;
     }
-vcvt_n_u32_f32_(a, N)
+    _vcvt_n_u32_f32(a, N).as_unsigned()
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -3349,1087 +4618,2110 @@ vcvt_n_u32_f32_(a, N)
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32")]
-        fn vcvtq_n_u32_f32_(a: float32x4_t, n: i32) -> uint32x4_t;
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32"
+        )]
+        fn _vcvtq_n_u32_f32(a: float32x4_t, n: i32) -> int32x4_t;
     }
-vcvtq_n_u32_f32_(a, N)
+    _vcvtq_n_u32_f32(a, N).as_unsigned()
 }
 
-/// Floating-point convert to fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32"
+        )]
+        fn _vcvt_n_u32_f32(a: float32x2_t, n: i32) -> int32x2_t;
+    }
+    _vcvt_n_u32_f32(a, N).as_unsigned()
+}
+
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
 #[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32")]
-        fn vcvtq_n_u32_f32_(a: float32x4_t, n: i32) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32"
+        )]
+        fn _vcvtq_n_u32_f32(a: float32x4_t, n: i32) -> int32x4_t;
     }
-vcvtq_n_u32_f32_(a, N)
+    _vcvtq_n_u32_f32(a, N).as_unsigned()
 }
 
-/// Floating-point convert to signed fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s32_f32)
+#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcvtzs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcvtzs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptosi.sat.v2i32.v2f32")]
-        fn vcvt_s32_f32_(a: float32x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptosi.sat.v2i32.v2f32"
+        )]
+        fn _vcvt_s32_f32(a: float32x2_t) -> int32x2_t;
     }
-vcvt_s32_f32_(a)
+    _vcvt_s32_f32(a)
 }
 
-/// Floating-point convert to signed fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s32_f32)
+#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcvtzs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcvtzs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptosi.sat.v4i32.v4f32")]
-        fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptosi.sat.v4i32.v4f32"
+        )]
+        fn _vcvtq_s32_f32(a: float32x4_t) -> int32x4_t;
     }
-vcvtq_s32_f32_(a)
+    _vcvtq_s32_f32(a)
 }
 
-/// Floating-point convert to unsigned fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u32_f32)
+#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcvtzu))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcvtzu)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptoui.sat.v2i32.v2f32")]
-        fn vcvt_u32_f32_(a: float32x2_t) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptoui.sat.v2i32.v2f32"
+        )]
+        fn _vcvt_u32_f32(a: float32x2_t) -> int32x2_t;
     }
-vcvt_u32_f32_(a)
+    _vcvt_u32_f32(a).as_unsigned()
 }
 
-/// Floating-point convert to unsigned fixed-point, rounding toward zero
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u32_f32)
+#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fcvtzu))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fcvtzu)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fptoui.sat.v4i32.v4f32")]
-        fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptoui.sat.v4i32.v4f32"
+        )]
+        fn _vcvtq_u32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtq_u32_f32(a).as_unsigned()
+}
+
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = transmute(c);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_s32(a, b, transmute(c))
+}
+
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdotq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int8x16_t,
+    c: int8x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = transmute(c);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_s32(a, b, transmute(c))
+}
+
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdot_lane_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint8x8_t,
+    c: uint8x8_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = transmute(c);
+    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_u32(a, b, transmute(c))
+}
+
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdotq_lane_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint8x16_t,
+    c: uint8x8_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = transmute(c);
+    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_u32(a, b, transmute(c))
+}
+
+#[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8"
+        )]
+        fn _vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
+    }
+    _vdot_s32(a, b, c)
+}
+
+#[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8"
+        )]
+        fn _vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
     }
-vcvtq_u32_f32_(a)
+    _vdotq_s32(a, b, c)
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)
+#[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.udot.v2i32.v8i8"
+        )]
+        fn _vdot_u32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
+    }
+    _vdot_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+
+#[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.udot.v4i32.v16i8"
+        )]
+        fn _vdotq_u32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    _vdotq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 1);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 1);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 3);
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 2);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub unsafe fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 2);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
     static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+    simd_shuffle!(
+        a,
+        a,
+        [
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
+        ]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(
+        a,
+        a,
+        [
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
+        ]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(
+        a,
+        a,
+        [
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
+        ]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, N = 0)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert!(N == 0);
+    a
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, N = 0)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert!(N == 0);
+    a
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
     static_assert_uimm_bits!(N, 2);
     simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 2);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
     static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 1);
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 2);
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 3);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
     static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 4)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 8)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 8)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
-    static_assert!(N == 0);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 8)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 8)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
-    static_assert!(N == 0);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
+        ]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 8)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
+        ]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 8)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
+        ]
+    )
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
+    static_assert_uimm_bits!(N, 1);
+    transmute::<i64, _>(simd_extract!(a, N as u32))
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(dup, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
     static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+    transmute::<u64, _>(simd_extract!(a, N as u32))
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 0)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
     static_assert!(N == 0);
-    a
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 0)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
     static_assert!(N == 0);
-    a
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 1);
-    transmute::<i64, _>(simd_extract!(a, N as u32))
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Set all vector lanes to the same value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup, N = 1)
+)]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(N, 1);
-    transmute::<u64, _>(simd_extract!(a, N as u32))
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_xor(a, b)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 15))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    match N & 0b1111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]),
-        8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]),
-        9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]),
-        10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]),
-        11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]),
-        12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]),
-        13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]),
-        14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]),
-        15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_xor(a, b)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_xor(a, b)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_xor(a, b)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_xor(a, b)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_xor(a, b)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    simd_xor(a, b)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 15))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    match N & 0b1111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]),
-        8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]),
-        9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]),
-        10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]),
-        11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]),
-        12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]),
-        13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]),
-        14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]),
-        15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_xor(a, b)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_xor(a, b)
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_xor(a, b)
+}
+
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_xor(a, b)
+}
+
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_xor(a, b)
+}
+
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_xor(a, b)
+}
+
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_xor(a, b)
+}
+
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_xor(a, b)
+}
+
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(eor)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_xor(a, b)
+}
+
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(N, 1);
     match N & 0b1 {
         0 => simd_shuffle!(a, b, [0, 1]),
@@ -4438,40 +6730,58 @@ pub unsafe fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 3);
     match N & 0b111 {
         0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
@@ -4486,74 +6796,100 @@ pub unsafe fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 15))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    match N & 0b1111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]),
-        8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]),
-        9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]),
-        10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]),
-        11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]),
-        12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]),
-        13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]),
-        14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]),
-        15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]),
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 7))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 3);
     match N & 0b111 {
         0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
@@ -4568,78 +6904,132 @@ pub unsafe fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 7)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
         _ => unreachable_unchecked(),
     }
 }
 
-/// Extract vector from pair of vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ext, N = 3))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(N, 2);
     match N & 0b11 {
         0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
@@ -4650,42284 +7040,42844 @@ pub unsafe fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32
     }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 3)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    simd_add(a, simd_mul(b, c))
-}
-
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 15)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    match N & 0b1111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
+        ),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 15)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    match N & 0b1111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
+        ),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext, N = 15)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    match N & 0b1111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
+        ),
+        _ => unreachable_unchecked(),
+    }
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
+        fn _vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+    }
+    _vfma_f32(b, c, a)
 }
 
-/// Multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
+        fn _vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+    }
+    _vfmaq_f32(b, c, a)
 }
 
-/// Floating-point multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vfma_f32(a, b, vdup_n_f32_vfp4(c))
 }
 
-/// Floating-point multiply-add to accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    simd_add(a, simd_mul(b, c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vfmaq_f32(a, b, vdupq_n_f32_vfp4(c))
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
-    vmla_s16(a, b, vdup_n_s16(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    let b: float32x2_t = simd_neg(b);
+    vfma_f32(a, b, c)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
-    vmlaq_s16(a, b, vdupq_n_s16(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    let b: float32x4_t = simd_neg(b);
+    vfmaq_f32(a, b, c)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
-    vmla_s32(a, b, vdup_n_s32(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vfms_f32(a, b, vdup_n_f32_vfp4(c))
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
-    vmlaq_s32(a, b, vdupq_n_s32(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vfmsq_f32(a, b, vdupq_n_f32_vfp4(c))
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
-    vmla_u16(a, b, vdup_n_u16(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")]
+        fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vhadd_s8(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
-    vmlaq_u16(a, b, vdupq_n_u16(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")]
+        fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vhaddq_s8(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
-    vmla_u32(a, b, vdup_n_u32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")]
+        fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vhadd_s16(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
-    vmlaq_u32(a, b, vdupq_n_u32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")]
+        fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vhaddq_s16(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vmla_f32(a, b, vdup_n_f32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")]
+        fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vhadd_s32(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vmlaq_f32(a, b, vdupq_n_f32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")]
+        fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vhaddq_s32(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")]
+        fn _vhadd_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vhadd_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmla_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")]
+        fn _vhaddq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vhaddq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")]
+        fn _vhadd_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vhadd_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlaq_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")]
+        fn _vhaddq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vhaddq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")]
+        fn _vhadd_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vhadd_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")]
+        fn _vhaddq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vhaddq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlaq_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
+        fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vhsub_s16(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
+        fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vhsubq_s16(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
+        fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vhsub_s32(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmla_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
+        fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vhsubq_s32(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
+        fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vhsub_s8(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlaq_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
-}
-
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
+        fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vhsubq_s8(a, b)
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
+        fn _vhsub_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vhsub_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlaq_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
+        fn _vhsubq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vhsubq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mla, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
+        fn _vhsub_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vhsub_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
+        fn _vhsubq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vhsubq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmla_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
+        fn _vhsub_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vhsub_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlaq_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uhsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhsub.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
+        fn _vhsubq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vhsubq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Vector multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlaq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2f32.p0f32")]
+        fn _vld1_f32_x2(a: *const f32) -> float32x2x2_t;
+    }
+    _vld1_f32_x2(a)
 }
 
-/// Signed multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    simd_add(a, vmull_s8(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2f32.p0f32")]
+        fn _vld1_f32_x3(a: *const f32) -> float32x2x3_t;
+    }
+    _vld1_f32_x3(a)
 }
 
-/// Signed multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    simd_add(a, vmull_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2f32.p0f32")]
+        fn _vld1_f32_x4(a: *const f32) -> float32x2x4_t;
+    }
+    _vld1_f32_x4(a)
 }
 
-/// Signed multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    simd_add(a, vmull_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f32.p0f32")]
+        fn _vld1q_f32_x2(a: *const f32) -> float32x4x2_t;
+    }
+    _vld1q_f32_x2(a)
 }
 
-/// Unsigned multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    simd_add(a, vmull_u8(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f32.p0f32")]
+        fn _vld1q_f32_x3(a: *const f32) -> float32x4x3_t;
+    }
+    _vld1q_f32_x3(a)
 }
 
-/// Unsigned multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    simd_add(a, vmull_u16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f32.p0f32")]
+        fn _vld1q_f32_x4(a: *const f32) -> float32x4x4_t;
+    }
+    _vld1q_f32_x4(a)
 }
 
-/// Unsigned multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    simd_add(a, vmull_u32(b, c))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
+    transmute(vld1_s64_x2(transmute(a)))
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vmlal_s16(a, b, vdup_n_s16(c))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
+    transmute(vld1_s64_x3(transmute(a)))
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vmlal_s32(a, b, vdup_n_s32(c))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
+    transmute(vld1_s64_x4(transmute(a)))
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
-    vmlal_u16(a, b, vdup_n_u16(c))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
+    transmute(vld1q_s64_x2(transmute(a)))
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
-    vmlal_u32(a, b, vdup_n_u32(c))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
+    transmute(vld1q_s64_x3(transmute(a)))
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
+    transmute(vld1q_s64_x4(transmute(a)))
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlal_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i8.p0i8")]
+        fn _vld1_s8_x2(a: *const i8) -> int8x8x2_t;
+    }
+    _vld1_s8_x2(a)
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i8.p0i8")]
+        fn _vld1_s8_x3(a: *const i8) -> int8x8x3_t;
+    }
+    _vld1_s8_x3(a)
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlal, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i8.p0i8")]
+        fn _vld1_s8_x4(a: *const i8) -> int8x8x4_t;
+    }
+    _vld1_s8_x4(a)
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v16i8.p0i8")]
+        fn _vld1q_s8_x2(a: *const i8) -> int8x16x2_t;
+    }
+    _vld1q_s8_x2(a)
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlal_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v16i8.p0i8")]
+        fn _vld1q_s8_x3(a: *const i8) -> int8x16x3_t;
+    }
+    _vld1q_s8_x3(a)
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v16i8.p0i8")]
+        fn _vld1q_s8_x4(a: *const i8) -> int8x16x4_t;
+    }
+    _vld1q_s8_x4(a)
 }
 
-/// Vector widening multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlal, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i16.p0i16")]
+        fn _vld1_s16_x2(a: *const i16) -> int16x4x2_t;
+    }
+    _vld1_s16_x2(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i16.p0i16")]
+        fn _vld1_s16_x3(a: *const i16) -> int16x4x3_t;
+    }
+    _vld1_s16_x3(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i16.p0i16")]
+        fn _vld1_s16_x4(a: *const i16) -> int16x4x4_t;
+    }
+    _vld1_s16_x4(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i16.p0i16")]
+        fn _vld1q_s16_x2(a: *const i16) -> int16x8x2_t;
+    }
+    _vld1q_s16_x2(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i16.p0i16")]
+        fn _vld1q_s16_x3(a: *const i16) -> int16x8x3_t;
+    }
+    _vld1q_s16_x3(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i16.p0i16")]
+        fn _vld1q_s16_x4(a: *const i16) -> int16x8x4_t;
+    }
+    _vld1q_s16_x4(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i32.p0i32")]
+        fn _vld1_s32_x2(a: *const i32) -> int32x2x2_t;
+    }
+    _vld1_s32_x2(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i32.p0i32")]
+        fn _vld1_s32_x3(a: *const i32) -> int32x2x3_t;
+    }
+    _vld1_s32_x3(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i32.p0i32")]
+        fn _vld1_s32_x4(a: *const i32) -> int32x2x4_t;
+    }
+    _vld1_s32_x4(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i32.p0i32")]
+        fn _vld1q_s32_x2(a: *const i32) -> int32x4x2_t;
+    }
+    _vld1q_s32_x2(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i32.p0i32")]
+        fn _vld1q_s32_x3(a: *const i32) -> int32x4x3_t;
+    }
+    _vld1q_s32_x3(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i32.p0i32")]
+        fn _vld1q_s32_x4(a: *const i32) -> int32x4x4_t;
+    }
+    _vld1q_s32_x4(a)
 }
 
-/// Multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v1i64.p0i64")]
+        fn _vld1_s64_x2(a: *const i64) -> int64x1x2_t;
+    }
+    _vld1_s64_x2(a)
 }
 
-/// Floating-point multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v1i64.p0i64")]
+        fn _vld1_s64_x3(a: *const i64) -> int64x1x3_t;
+    }
+    _vld1_s64_x3(a)
 }
 
-/// Floating-point multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    simd_sub(a, simd_mul(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v1i64.p0i64")]
+        fn _vld1_s64_x4(a: *const i64) -> int64x1x4_t;
+    }
+    _vld1_s64_x4(a)
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
-    vmls_s16(a, b, vdup_n_s16(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i64.p0i64")]
+        fn _vld1q_s64_x2(a: *const i64) -> int64x2x2_t;
+    }
+    _vld1q_s64_x2(a)
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
-    vmlsq_s16(a, b, vdupq_n_s16(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i64.p0i64")]
+        fn _vld1q_s64_x3(a: *const i64) -> int64x2x3_t;
+    }
+    _vld1q_s64_x3(a)
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
-    vmls_s32(a, b, vdup_n_s32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i64.p0i64")]
+        fn _vld1q_s64_x4(a: *const i64) -> int64x2x4_t;
+    }
+    _vld1q_s64_x4(a)
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
-    vmlsq_s32(a, b, vdupq_n_s32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
+    transmute(vld1_s8_x2(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
-    vmls_u16(a, b, vdup_n_u16(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
+    transmute(vld1_s8_x3(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
-    vmlsq_u16(a, b, vdupq_n_u16(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
+    transmute(vld1_s8_x4(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
-    vmls_u32(a, b, vdup_n_u32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
+    transmute(vld1q_s8_x2(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
-    vmlsq_u32(a, b, vdupq_n_u32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
+    transmute(vld1q_s8_x3(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vmls_f32(a, b, vdup_n_f32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
+    transmute(vld1q_s8_x4(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vmlsq_f32(a, b, vdupq_n_f32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
+    transmute(vld1_s16_x2(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
+    transmute(vld1_s16_x3(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmls_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
+    transmute(vld1_s16_x4(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
+    transmute(vld1q_s16_x2(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsq_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
+    transmute(vld1q_s16_x3(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
+    transmute(vld1q_s16_x4(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
+    transmute(vld1_s32_x2(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsq_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
+    transmute(vld1_s32_x3(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
+    transmute(vld1_s32_x4(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
+    transmute(vld1q_s32_x2(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmls_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
+    transmute(vld1q_s32_x3(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
+    transmute(vld1q_s32_x4(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsq_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
+    transmute(vld1_s64_x2(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
+    transmute(vld1_s64_x3(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
+    transmute(vld1_s64_x4(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsq_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
+    transmute(vld1q_s64_x2(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mls, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
+    transmute(vld1q_s64_x3(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
+    transmute(vld1q_s64_x4(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmls_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
+    transmute(vld1_s8_x2(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsq_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
+    transmute(vld1_s8_x3(transmute(a)))
 }
 
-/// Vector multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
+    transmute(vld1_s8_x4(transmute(a)))
 }
 
-/// Signed multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    simd_sub(a, vmull_s8(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
+    transmute(vld1q_s8_x2(transmute(a)))
 }
 
-/// Signed multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    simd_sub(a, vmull_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
+    transmute(vld1q_s8_x3(transmute(a)))
 }
 
-/// Signed multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    simd_sub(a, vmull_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
+    transmute(vld1q_s8_x4(transmute(a)))
 }
 
-/// Unsigned multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    simd_sub(a, vmull_u8(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
+    transmute(vld1_s16_x2(transmute(a)))
 }
 
-/// Unsigned multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    simd_sub(a, vmull_u16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
+    transmute(vld1_s16_x3(transmute(a)))
 }
 
-/// Unsigned multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    simd_sub(a, vmull_u32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
+    transmute(vld1_s16_x4(transmute(a)))
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vmlsl_s16(a, b, vdup_n_s16(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
+    transmute(vld1q_s16_x2(transmute(a)))
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vmlsl_s32(a, b, vdup_n_s32(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
+    transmute(vld1q_s16_x3(transmute(a)))
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
-    vmlsl_u16(a, b, vdup_n_u16(c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
+    transmute(vld1q_s16_x4(transmute(a)))
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
-    vmlsl_u32(a, b, vdup_n_u32(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0i8")]
+        fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
+    }
+    _vld2_dup_f32(a as *const i8, 4)
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0i8")]
+        fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
+    }
+    _vld2q_dup_f32(a as *const i8, 4)
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsl_s16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0i8")]
+        fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
+    }
+    _vld2_dup_s8(a as *const i8, 1)
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0i8")]
+        fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
+    }
+    _vld2q_dup_s8(a as *const i8, 1)
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smlsl, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0i8")]
+        fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
+    }
+    _vld2_dup_s16(a as *const i8, 2)
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0i8")]
+        fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
+    }
+    _vld2q_dup_s16(a as *const i8, 2)
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsl_u16(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0i8")]
+        fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
+    }
+    _vld2_dup_s32(a as *const i8, 4)
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0i8")]
+        fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
+    }
+    _vld2q_dup_s32(a as *const i8, 4)
 }
 
-/// Vector widening multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umlsl, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2f32.p0f32"
+        )]
+        fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t;
+    }
+    _vld2_dup_f32(a as _)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t {
-    simd_neg(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4f32.p0f32"
+        )]
+        fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t;
+    }
+    _vld2q_dup_f32(a as _)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t {
-    simd_neg(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v8i8.p0i8"
+        )]
+        fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t;
+    }
+    _vld2_dup_s8(a as _)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t {
-    simd_neg(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v16i8.p0i8"
+        )]
+        fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t;
+    }
+    _vld2q_dup_s8(a as _)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t {
-    simd_neg(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4i16.p0i16"
+        )]
+        fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t;
+    }
+    _vld2_dup_s16(a as _)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t {
-    simd_neg(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v8i16.p0i16"
+        )]
+        fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t;
+    }
+    _vld2q_dup_s16(a as _)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(neg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t {
-    simd_neg(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2i32.p0i32"
+        )]
+        fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t;
+    }
+    _vld2_dup_s32(a as _)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fneg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t {
-    simd_neg(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4i32.p0i32"
+        )]
+        fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t;
+    }
+    _vld2q_dup_s32(a as _)
 }
 
-/// Negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fneg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t {
-    simd_neg(a)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
+    transmute(vld2_dup_s64(transmute(a)))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v8i8")]
-        fn vqneg_s8_(a: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0i8")]
+        fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
     }
-vqneg_s8_(a)
+    _vld2_dup_s64(a as *const i8, 8)
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v16i8")]
-        fn vqnegq_s8_(a: int8x16_t) -> int8x16_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v1i64.p0i64"
+        )]
+        fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t;
     }
-vqnegq_s8_(a)
+    _vld2_dup_s64(a as _)
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v4i16")]
-        fn vqneg_s16_(a: int16x4_t) -> int16x4_t;
-    }
-vqneg_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
+    transmute(vld2_dup_s64(transmute(a)))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v8i16")]
-        fn vqnegq_s16_(a: int16x8_t) -> int16x8_t;
-    }
-vqnegq_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
+    transmute(vld2_dup_s8(transmute(a)))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v2i32")]
-        fn vqneg_s32_(a: int32x2_t) -> int32x2_t;
-    }
-vqneg_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
+    transmute(vld2q_dup_s8(transmute(a)))
 }
 
-/// Signed saturating negate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqneg))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqneg.v4i32")]
-        fn vqnegq_s32_(a: int32x4_t) -> int32x4_t;
-    }
-vqnegq_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
+    transmute(vld2_dup_s16(transmute(a)))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v8i8")]
-        fn vqsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-vqsub_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
+    transmute(vld2q_dup_s16(transmute(a)))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v16i8")]
-        fn vqsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-vqsubq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
+    transmute(vld2_dup_s32(transmute(a)))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v4i16")]
-        fn vqsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-vqsub_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
+    transmute(vld2q_dup_s32(transmute(a)))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v8i16")]
-        fn vqsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-vqsubq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
+    transmute(vld2_dup_s8(transmute(a)))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v2i32")]
-        fn vqsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-vqsub_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
+    transmute(vld2q_dup_s8(transmute(a)))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v4i32")]
-        fn vqsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-vqsubq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
+    transmute(vld2_dup_s16(transmute(a)))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v1i64")]
-        fn vqsub_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
-    }
-vqsub_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
+    transmute(vld2q_dup_s16(transmute(a)))
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqsub.v2i64")]
-        fn vqsubq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32.p0i8")]
+        fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
     }
-vqsubq_u64_(a, b)
+    _vld2_f32(a as *const i8, 4)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v8i8")]
-        fn vqsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32.p0i8")]
+        fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
     }
-vqsub_s8_(a, b)
+    _vld2q_f32(a as *const i8, 4)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v16i8")]
-        fn vqsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8.p0i8")]
+        fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
     }
-vqsubq_s8_(a, b)
+    _vld2_s8(a as *const i8, 1)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v4i16")]
-        fn vqsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8.p0i8")]
+        fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
     }
-vqsub_s16_(a, b)
+    _vld2q_s8(a as *const i8, 1)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v8i16")]
-        fn vqsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16.p0i8")]
+        fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
     }
-vqsubq_s16_(a, b)
+    _vld2_s16(a as *const i8, 2)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v2i32")]
-        fn vqsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16.p0i8")]
+        fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
     }
-vqsub_s32_(a, b)
+    _vld2q_s16(a as *const i8, 2)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v4i32")]
-        fn vqsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32.p0i8")]
+        fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
     }
-vqsubq_s32_(a, b)
+    _vld2_s32(a as *const i8, 4)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v1i64")]
-        fn vqsub_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32.p0i8")]
+        fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
     }
-vqsub_s64_(a, b)
+    _vld2q_s32(a as *const i8, 4)
 }
 
-/// Saturating subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqsub.v2i64")]
-        fn vqsubq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v2f32.p0v2f32"
+        )]
+        fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t;
     }
-vqsubq_s64_(a, b)
+    _vld2_f32(a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v8i8")]
-        fn vhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v4f32.p0v4f32"
+        )]
+        fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t;
     }
-vhadd_u8_(a, b)
+    _vld2q_f32(a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v16i8")]
-        fn vhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v8i8.p0v8i8"
+        )]
+        fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t;
     }
-vhaddq_u8_(a, b)
+    _vld2_s8(a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v4i16")]
-        fn vhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v16i8.p0v16i8"
+        )]
+        fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t;
     }
-vhadd_u16_(a, b)
+    _vld2q_s8(a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v8i16")]
-        fn vhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v4i16.p0v4i16"
+        )]
+        fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t;
     }
-vhaddq_u16_(a, b)
+    _vld2_s16(a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v2i32")]
-        fn vhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v8i16.p0v8i16"
+        )]
+        fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t;
     }
-vhadd_u32_(a, b)
+    _vld2q_s16(a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhadd.v4i32")]
-        fn vhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v2i32.p0v2i32"
+        )]
+        fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t;
     }
-vhaddq_u32_(a, b)
+    _vld2_s32(a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v8i8")]
-        fn vhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v4i32.p0v4i32"
+        )]
+        fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t;
     }
-vhadd_s8_(a, b)
+    _vld2q_s32(a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v16i8")]
-        fn vhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0i8"
+        )]
+        fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t;
     }
-vhaddq_s8_(a, b)
+    _vld2_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v4i16")]
-        fn vhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0i8"
+        )]
+        fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8)
+            -> float32x4x2_t;
     }
-vhadd_s16_(a, b)
+    _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v8i16")]
-        fn vhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0i8"
+        )]
+        fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t;
     }
-vhaddq_s16_(a, b)
+    _vld2_lane_s8(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v2i32")]
-        fn vhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0i8"
+        )]
+        fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t;
     }
-vhadd_s32_(a, b)
+    _vld2_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shadd.v4i32")]
-        fn vhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0i8"
+        )]
+        fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t;
     }
-vhaddq_s32_(a, b)
+    _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v8i8")]
-        fn vrhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0i8"
+        )]
+        fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t;
     }
-vrhadd_u8_(a, b)
+    _vld2_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v16i8")]
-        fn vrhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0i8"
+        )]
+        fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t;
     }
-vrhaddq_u8_(a, b)
+    _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v4i16")]
-        fn vrhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0i8")]
+        fn _vld2_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x2_t;
     }
-vrhadd_u16_(a, b)
+    _vld2_lane_f32(a as _, b.0, b.1, LANE, 4)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v8i16")]
-        fn vrhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0i8")]
+        fn _vld2q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x2_t;
     }
-vrhaddq_u16_(a, b)
+    _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v2i32")]
-        fn vrhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0i8")]
+        fn _vld2q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x2_t;
     }
-vrhadd_u32_(a, b)
+    _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urhadd.v4i32")]
-        fn vrhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0i8")]
+        fn _vld2q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x2_t;
     }
-vrhaddq_u32_(a, b)
+    _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v8i8")]
-        fn vrhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0i8")]
+        fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32)
+            -> int8x8x2_t;
     }
-vrhadd_s8_(a, b)
+    _vld2_lane_s8(a as _, b.0, b.1, LANE, 1)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v16i8")]
-        fn vrhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0i8")]
+        fn _vld2_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x2_t;
     }
-vrhaddq_s8_(a, b)
+    _vld2_lane_s16(a as _, b.0, b.1, LANE, 2)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v4i16")]
-        fn vrhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0i8")]
+        fn _vld2_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x2_t;
     }
-vrhadd_s16_(a, b)
+    _vld2_lane_s32(a as _, b.0, b.1, LANE, 4)
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v8i16")]
-        fn vrhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vrhaddq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v2i32")]
-        fn vrhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vrhadd_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Rounding halving add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srhadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srhadd.v4i32")]
-        fn vrhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vrhaddq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Floating-point round to integral, to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frintn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frintn.v2f32")]
-        fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
-    }
-vrndn_f32_(a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld2_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Floating-point round to integral, to nearest with ties to even
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frintn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frintn.v4f32")]
-        fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-vrndnq_f32_(a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld2q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v8i8")]
-        fn vqadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-vqadd_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v16i8")]
-        fn vqaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-vqaddq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v4i16")]
-        fn vqadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-vqadd_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v8i16")]
-        fn vqaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-vqaddq_u16_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
+    transmute(vld2_s64(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v2i32")]
-        fn vqadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64.p0i8")]
+        fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
     }
-vqadd_u32_(a, b)
+    _vld2_s64(a as *const i8, 8)
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v4i32")]
-        fn vqaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v1i64.p0v1i64"
+        )]
+        fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t;
     }
-vqaddq_u32_(a, b)
+    _vld2_s64(a as _)
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v1i64")]
-        fn vqadd_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
-    }
-vqadd_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
+    transmute(vld2_s64(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqadd.v2i64")]
-        fn vqaddq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
-    }
-vqaddq_u64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
+    transmute(vld2_s8(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v8i8")]
-        fn vqadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vqadd_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
+    transmute(vld2q_s8(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v16i8")]
-        fn vqaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vqaddq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
+    transmute(vld2_s16(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v4i16")]
-        fn vqadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vqadd_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
+    transmute(vld2q_s16(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v8i16")]
-        fn vqaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vqaddq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
+    transmute(vld2_s32(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v2i32")]
-        fn vqadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vqadd_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
+    transmute(vld2q_s32(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v4i32")]
-        fn vqaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vqaddq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
+    transmute(vld2_s8(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v1i64")]
-        fn vqadd_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-vqadd_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
+    transmute(vld2q_s8(transmute(a)))
 }
 
-/// Saturating add
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqadd))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqadd.v2i64")]
-        fn vqaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-vqaddq_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
+    transmute(vld2_s16(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i8.p0i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0i8")]
-        fn vld1_s8_x2_(a: *const i8) -> int8x8x2_t;
-    }
-vld1_s8_x2_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
+    transmute(vld2q_s16(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i16.p0i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0i16")]
-        fn vld1_s16_x2_(a: *const i16) -> int16x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v2f32.p0f32"
+        )]
+        fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t;
     }
-vld1_s16_x2_(a)
+    _vld3_dup_f32(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i32.p0i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0i32")]
-        fn vld1_s32_x2_(a: *const i32) -> int32x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v4f32.p0f32"
+        )]
+        fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t;
     }
-vld1_s32_x2_(a)
+    _vld3q_dup_f32(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v1i64.p0i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0i64")]
-        fn vld1_s64_x2_(a: *const i64) -> int64x1x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v8i8.p0i8"
+        )]
+        fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t;
     }
-vld1_s64_x2_(a)
+    _vld3_dup_s8(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v16i8.p0i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0i8")]
-        fn vld1q_s8_x2_(a: *const i8) -> int8x16x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v16i8.p0i8"
+        )]
+        fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t;
     }
-vld1q_s8_x2_(a)
+    _vld3q_dup_s8(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i16.p0i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0i16")]
-        fn vld1q_s16_x2_(a: *const i16) -> int16x8x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v4i16.p0i16"
+        )]
+        fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t;
     }
-vld1q_s16_x2_(a)
+    _vld3_dup_s16(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i32.p0i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0i32")]
-        fn vld1q_s32_x2_(a: *const i32) -> int32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v8i16.p0i16"
+        )]
+        fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t;
     }
-vld1q_s32_x2_(a)
+    _vld3q_dup_s16(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i64.p0i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0i64")]
-        fn vld1q_s64_x2_(a: *const i64) -> int64x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v2i32.p0i32"
+        )]
+        fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t;
     }
-vld1q_s64_x2_(a)
+    _vld3_dup_s32(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i8.p0i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0i8")]
-        fn vld1_s8_x3_(a: *const i8) -> int8x8x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v4i32.p0i32"
+        )]
+        fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t;
     }
-vld1_s8_x3_(a)
+    _vld3q_dup_s32(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i16.p0i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0i16")]
-        fn vld1_s16_x3_(a: *const i16) -> int16x4x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v1i64.p0i64"
+        )]
+        fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t;
     }
-vld1_s16_x3_(a)
+    _vld3_dup_s64(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i32.p0i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0i32")]
-        fn vld1_s32_x3_(a: *const i32) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0i8")]
+        fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
     }
-vld1_s32_x3_(a)
+    _vld3_dup_f32(a as *const i8, 4)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v1i64.p0i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0i64")]
-        fn vld1_s64_x3_(a: *const i64) -> int64x1x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0i8")]
+        fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
     }
-vld1_s64_x3_(a)
+    _vld3q_dup_f32(a as *const i8, 4)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v16i8.p0i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0i8")]
-        fn vld1q_s8_x3_(a: *const i8) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0i8")]
+        fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
     }
-vld1q_s8_x3_(a)
+    _vld3_dup_s8(a as *const i8, 1)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i16.p0i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0i16")]
-        fn vld1q_s16_x3_(a: *const i16) -> int16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0i8")]
+        fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
     }
-vld1q_s16_x3_(a)
+    _vld3q_dup_s8(a as *const i8, 1)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i32.p0i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0i32")]
-        fn vld1q_s32_x3_(a: *const i32) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0i8")]
+        fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
     }
-vld1q_s32_x3_(a)
+    _vld3_dup_s16(a as *const i8, 2)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i64.p0i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0i64")]
-        fn vld1q_s64_x3_(a: *const i64) -> int64x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0i8")]
+        fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
     }
-vld1q_s64_x3_(a)
+    _vld3q_dup_s16(a as *const i8, 2)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i8.p0i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0i8")]
-        fn vld1_s8_x4_(a: *const i8) -> int8x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0i8")]
+        fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
     }
-vld1_s8_x4_(a)
+    _vld3_dup_s32(a as *const i8, 4)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i16.p0i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0i16")]
-        fn vld1_s16_x4_(a: *const i16) -> int16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0i8")]
+        fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
     }
-vld1_s16_x4_(a)
+    _vld3q_dup_s32(a as *const i8, 4)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i32.p0i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0i32")]
-        fn vld1_s32_x4_(a: *const i32) -> int32x2x4_t;
-    }
-vld1_s32_x4_(a)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
+    transmute(vld3_dup_s64(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v1i64.p0i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0i64")]
-        fn vld1_s64_x4_(a: *const i64) -> int64x1x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0i8")]
+        fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
     }
-vld1_s64_x4_(a)
+    _vld3_dup_s64(a as *const i8, 8)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v16i8.p0i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0i8")]
-        fn vld1q_s8_x4_(a: *const i8) -> int8x16x4_t;
-    }
-vld1q_s8_x4_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
+    transmute(vld3_dup_s64(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i16.p0i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0i16")]
-        fn vld1q_s16_x4_(a: *const i16) -> int16x8x4_t;
-    }
-vld1q_s16_x4_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
+    transmute(vld3_dup_s8(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i32.p0i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0i32")]
-        fn vld1q_s32_x4_(a: *const i32) -> int32x4x4_t;
-    }
-vld1q_s32_x4_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
+    transmute(vld3q_dup_s8(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i64.p0i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0i64")]
-        fn vld1q_s64_x4_(a: *const i64) -> int64x2x4_t;
-    }
-vld1q_s64_x4_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
+    transmute(vld3_dup_s16(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
-    transmute(vld1_s8_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
+    transmute(vld3q_dup_s16(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
-    transmute(vld1_s16_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
+    transmute(vld3_dup_s32(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
-    transmute(vld1_s32_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
+    transmute(vld3q_dup_s32(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
-    transmute(vld1_s64_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
+    transmute(vld3_dup_s8(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
-    transmute(vld1q_s8_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
+    transmute(vld3q_dup_s8(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
-    transmute(vld1q_s16_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
+    transmute(vld3_dup_s16(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
-    transmute(vld1q_s32_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
+    transmute(vld3q_dup_s16(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
-    transmute(vld1q_s64_x2(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v2f32.p0v2f32"
+        )]
+        fn _vld3_f32(ptr: *const float32x2_t) -> float32x2x3_t;
+    }
+    _vld3_f32(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
-    transmute(vld1_s8_x3(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v4f32.p0v4f32"
+        )]
+        fn _vld3q_f32(ptr: *const float32x4_t) -> float32x4x3_t;
+    }
+    _vld3q_f32(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
-    transmute(vld1_s16_x3(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v8i8.p0v8i8"
+        )]
+        fn _vld3_s8(ptr: *const int8x8_t) -> int8x8x3_t;
+    }
+    _vld3_s8(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
-    transmute(vld1_s32_x3(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v16i8.p0v16i8"
+        )]
+        fn _vld3q_s8(ptr: *const int8x16_t) -> int8x16x3_t;
+    }
+    _vld3q_s8(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
-    transmute(vld1_s64_x3(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v4i16.p0v4i16"
+        )]
+        fn _vld3_s16(ptr: *const int16x4_t) -> int16x4x3_t;
+    }
+    _vld3_s16(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
-    transmute(vld1q_s8_x3(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v8i16.p0v8i16"
+        )]
+        fn _vld3q_s16(ptr: *const int16x8_t) -> int16x8x3_t;
+    }
+    _vld3q_s16(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
-    transmute(vld1q_s16_x3(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v2i32.p0v2i32"
+        )]
+        fn _vld3_s32(ptr: *const int32x2_t) -> int32x2x3_t;
+    }
+    _vld3_s32(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
-    transmute(vld1q_s32_x3(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v4i32.p0v4i32"
+        )]
+        fn _vld3q_s32(ptr: *const int32x4_t) -> int32x4x3_t;
+    }
+    _vld3q_s32(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
-    transmute(vld1q_s64_x3(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0i8")]
+        fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
+    }
+    _vld3_f32(a as *const i8, 4)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
-    transmute(vld1_s8_x4(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0i8")]
+        fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
+    }
+    _vld3q_f32(a as *const i8, 4)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
-    transmute(vld1_s16_x4(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0i8")]
+        fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
+    }
+    _vld3_s8(a as *const i8, 1)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
-    transmute(vld1_s32_x4(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0i8")]
+        fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
+    }
+    _vld3q_s8(a as *const i8, 1)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
-    transmute(vld1_s64_x4(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0i8")]
+        fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
+    }
+    _vld3_s16(a as *const i8, 2)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
-    transmute(vld1q_s8_x4(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0i8")]
+        fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
+    }
+    _vld3q_s16(a as *const i8, 2)
+}
+
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0i8")]
+        fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
+    }
+    _vld3_s32(a as *const i8, 4)
+}
+
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0i8")]
+        fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
+    }
+    _vld3q_s32(a as *const i8, 4)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
-    transmute(vld1q_s16_x4(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0i8"
+        )]
+        fn _vld3_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x2x3_t;
+    }
+    _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
-    transmute(vld1q_s32_x4(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0i8"
+        )]
+        fn _vld3q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x4x3_t;
+    }
+    _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0i8")]
+        fn _vld3_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x3_t;
+    }
+    _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
-    transmute(vld1q_s64_x4(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0i8"
+        )]
+        fn _vld3_lane_s8(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x8x3_t;
+    }
+    _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
-    transmute(vld1_s8_x2(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0i8"
+        )]
+        fn _vld3_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x4x3_t;
+    }
+    _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
-    transmute(vld1_s8_x3(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
+    static_assert_uimm_bits!(LANE, 4);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0i8"
+        )]
+        fn _vld3q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x8x3_t;
+    }
+    _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
-    transmute(vld1_s8_x4(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0i8"
+        )]
+        fn _vld3_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x2x3_t;
+    }
+    _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
-    transmute(vld1q_s8_x2(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0i8"
+        )]
+        fn _vld3q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x4x3_t;
+    }
+    _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+}
+
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0i8")]
+        fn _vld3_lane_s8(
+            ptr: *const i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            n: i32,
+            size: i32,
+        ) -> int8x8x3_t;
+    }
+    _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1)
+}
+
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0i8")]
+        fn _vld3_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x3_t;
+    }
+    _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+}
+
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0i8")]
+        fn _vld3q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x3_t;
+    }
+    _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+}
+
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0i8")]
+        fn _vld3_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x3_t;
+    }
+    _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+}
+
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0i8")]
+        fn _vld3q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x3_t;
+    }
+    _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
-    transmute(vld1q_s8_x3(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
-    transmute(vld1q_s8_x4(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
-    transmute(vld1_s16_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
-    transmute(vld1_s16_x3(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld3_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
-    transmute(vld1_s16_x4(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
-    transmute(vld1q_s16_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
-    transmute(vld1q_s16_x3(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
-    transmute(vld1q_s16_x4(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
-    transmute(vld1_s64_x2(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
+    transmute(vld3_s64(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
-    transmute(vld1_s64_x3(transmute(a)))
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v1i64.p0v1i64"
+        )]
+        fn _vld3_s64(ptr: *const int64x1_t) -> int64x1x3_t;
+    }
+    _vld3_s64(a as _)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
-    transmute(vld1_s64_x4(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0i8")]
+        fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
+    }
+    _vld3_s64(a as *const i8, 8)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
-    transmute(vld1q_s64_x2(transmute(a)))
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
+    transmute(vld3_s64(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
-    transmute(vld1q_s64_x3(transmute(a)))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
+    transmute(vld3_s8(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
-    transmute(vld1q_s64_x4(transmute(a)))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
+    transmute(vld3q_s8(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2f32.p0f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0f32")]
-        fn vld1_f32_x2_(a: *const f32) -> float32x2x2_t;
-    }
-vld1_f32_x2_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
+    transmute(vld3_s16(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f32.p0f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0f32")]
-        fn vld1q_f32_x2_(a: *const f32) -> float32x4x2_t;
-    }
-vld1q_f32_x2_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
+    transmute(vld3q_s16(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2f32.p0f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0f32")]
-        fn vld1_f32_x3_(a: *const f32) -> float32x2x3_t;
-    }
-vld1_f32_x3_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
+    transmute(vld3_s32(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f32.p0f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0f32")]
-        fn vld1q_f32_x3_(a: *const f32) -> float32x4x3_t;
-    }
-vld1q_f32_x3_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
+    transmute(vld3q_s32(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2f32.p0f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0f32")]
-        fn vld1_f32_x4_(a: *const f32) -> float32x2x4_t;
-    }
-vld1_f32_x4_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
+    transmute(vld3_s8(transmute(a)))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f32.p0f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0f32")]
-        fn vld1q_f32_x4_(a: *const f32) -> float32x4x4_t;
-    }
-vld1q_f32_x4_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
+    transmute(vld3q_s8(transmute(a)))
+}
+
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
+    transmute(vld3_s16(transmute(a)))
+}
+
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
+    transmute(vld3q_s16(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8.p0i8")]
-        fn vld2_s8_(ptr: *const i8, size: i32) -> int8x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0i8")]
+        fn _vld3q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x3_t;
     }
-vld2_s8_(a as *const i8, 1)
+    _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v8i8.p0v8i8")]
-        fn vld2_s8_(ptr: *const int8x8_t) -> int8x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0i8")]
+        fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
     }
-vld2_s8_(a as _)
+    _vld4_dup_f32(a as *const i8, 4)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
+#[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16.p0i8")]
-        fn vld2_s16_(ptr: *const i8, size: i32) -> int16x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0i8")]
+        fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
     }
-vld2_s16_(a as *const i8, 2)
+    _vld4q_dup_f32(a as *const i8, 4)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v4i16.p0v4i16")]
-        fn vld2_s16_(ptr: *const int16x4_t) -> int16x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0i8")]
+        fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
     }
-vld2_s16_(a as _)
+    _vld4_dup_s8(a as *const i8, 1)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
+#[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32.p0i8")]
-        fn vld2_s32_(ptr: *const i8, size: i32) -> int32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0i8")]
+        fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
     }
-vld2_s32_(a as *const i8, 4)
+    _vld4q_dup_s8(a as *const i8, 1)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v2i32.p0v2i32")]
-        fn vld2_s32_(ptr: *const int32x2_t) -> int32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0i8")]
+        fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
     }
-vld2_s32_(a as _)
+    _vld4_dup_s16(a as *const i8, 2)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
+#[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8.p0i8")]
-        fn vld2q_s8_(ptr: *const i8, size: i32) -> int8x16x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0i8")]
+        fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
     }
-vld2q_s8_(a as *const i8, 1)
+    _vld4q_dup_s16(a as *const i8, 2)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v16i8.p0v16i8")]
-        fn vld2q_s8_(ptr: *const int8x16_t) -> int8x16x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0i8")]
+        fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
     }
-vld2q_s8_(a as _)
+    _vld4_dup_s32(a as *const i8, 4)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
+#[cfg_attr(test, assert_instr(vld4))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16.p0i8")]
-        fn vld2q_s16_(ptr: *const i8, size: i32) -> int16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0i8")]
+        fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
     }
-vld2q_s16_(a as *const i8, 2)
+    _vld4q_dup_s32(a as *const i8, 4)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v8i16.p0v8i16")]
-        fn vld2q_s16_(ptr: *const int16x8_t) -> int16x8x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2f32.p0f32"
+        )]
+        fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t;
     }
-vld2q_s16_(a as _)
+    _vld4_dup_f32(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32.p0i8")]
-        fn vld2q_s32_(ptr: *const i8, size: i32) -> int32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v4f32.p0f32"
+        )]
+        fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t;
     }
-vld2q_s32_(a as *const i8, 4)
+    _vld4q_dup_f32(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v4i32.p0v4i32")]
-        fn vld2q_s32_(ptr: *const int32x4_t) -> int32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v8i8.p0i8"
+        )]
+        fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t;
     }
-vld2q_s32_(a as _)
+    _vld4_dup_s8(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64.p0i8")]
-        fn vld2_s64_(ptr: *const i8, size: i32) -> int64x1x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v16i8.p0i8"
+        )]
+        fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t;
     }
-vld2_s64_(a as *const i8, 8)
+    _vld4q_dup_s8(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v1i64.p0v1i64")]
-        fn vld2_s64_(ptr: *const int64x1_t) -> int64x1x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v4i16.p0i16"
+        )]
+        fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t;
     }
-vld2_s64_(a as _)
+    _vld4_dup_s16(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
-    transmute(vld2_s8(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v8i16.p0i16"
+        )]
+        fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t;
+    }
+    _vld4q_dup_s16(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
-    transmute(vld2_s16(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2i32.p0i32"
+        )]
+        fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t;
+    }
+    _vld4_dup_s32(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
-    transmute(vld2_s32(transmute(a)))
-}
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v4i32.p0i32"
+        )]
+        fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t;
+    }
+    _vld4q_dup_s32(a as _)
+}
+
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v1i64.p0i64"
+        )]
+        fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t;
+    }
+    _vld4_dup_s64(a as _)
+}
+
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
+    transmute(vld4_dup_s64(transmute(a)))
+}
+
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(nop))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0i8")]
+        fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
+    }
+    _vld4_dup_s64(a as *const i8, 8)
+}
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
-    transmute(vld2q_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
+    transmute(vld4_dup_s64(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
-    transmute(vld2q_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
+    transmute(vld4_dup_s8(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
-    transmute(vld2q_s32(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
+    transmute(vld4q_dup_s8(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
-    transmute(vld2_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
+    transmute(vld4_dup_s16(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
-    transmute(vld2_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
+    transmute(vld4q_dup_s16(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
-    transmute(vld2q_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
+    transmute(vld4_dup_s32(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
-    transmute(vld2q_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
+    transmute(vld4q_dup_s32(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
-    transmute(vld2_s64(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
+    transmute(vld4_dup_s8(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
-    transmute(vld2_s64(transmute(a)))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
+    transmute(vld4q_dup_s8(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32.p0i8")]
-        fn vld2_f32_(ptr: *const i8, size: i32) -> float32x2x2_t;
-    }
-vld2_f32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
+    transmute(vld4_dup_s16(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v2f32.p0v2f32")]
-        fn vld2_f32_(ptr: *const float32x2_t) -> float32x2x2_t;
-    }
-vld2_f32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
+    transmute(vld4q_dup_s16(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32.p0i8")]
-        fn vld2q_f32_(ptr: *const i8, size: i32) -> float32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v2f32.p0v2f32"
+        )]
+        fn _vld4_f32(ptr: *const float32x2_t) -> float32x2x4_t;
     }
-vld2q_f32_(a as *const i8, 4)
+    _vld4_f32(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2))]
+#[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2.v4f32.p0v4f32")]
-        fn vld2q_f32_(ptr: *const float32x4_t) -> float32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v4f32.p0v4f32"
+        )]
+        fn _vld4q_f32(ptr: *const float32x4_t) -> float32x4x4_t;
     }
-vld2q_f32_(a as _)
+    _vld4q_f32(a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0i8")]
-        fn vld2_dup_s8_(ptr: *const i8, size: i32) -> int8x8x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v8i8.p0v8i8"
+        )]
+        fn _vld4_s8(ptr: *const int8x8_t) -> int8x8x4_t;
     }
-vld2_dup_s8_(a as *const i8, 1)
+    _vld4_s8(a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
+#[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v8i8.p0i8")]
-        fn vld2_dup_s8_(ptr: *const i8) -> int8x8x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v16i8.p0v16i8"
+        )]
+        fn _vld4q_s8(ptr: *const int8x16_t) -> int8x16x4_t;
     }
-vld2_dup_s8_(a as _)
+    _vld4q_s8(a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0i8")]
-        fn vld2_dup_s16_(ptr: *const i8, size: i32) -> int16x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v4i16.p0v4i16"
+        )]
+        fn _vld4_s16(ptr: *const int16x4_t) -> int16x4x4_t;
     }
-vld2_dup_s16_(a as *const i8, 2)
+    _vld4_s16(a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
+#[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v4i16.p0i16")]
-        fn vld2_dup_s16_(ptr: *const i16) -> int16x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v8i16.p0v8i16"
+        )]
+        fn _vld4q_s16(ptr: *const int16x8_t) -> int16x8x4_t;
     }
-vld2_dup_s16_(a as _)
+    _vld4q_s16(a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0i8")]
-        fn vld2_dup_s32_(ptr: *const i8, size: i32) -> int32x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v2i32.p0v2i32"
+        )]
+        fn _vld4_s32(ptr: *const int32x2_t) -> int32x2x4_t;
     }
-vld2_dup_s32_(a as *const i8, 4)
+    _vld4_s32(a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
+#[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v2i32.p0i32")]
-        fn vld2_dup_s32_(ptr: *const i32) -> int32x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v4i32.p0v4i32"
+        )]
+        fn _vld4q_s32(ptr: *const int32x4_t) -> int32x4x4_t;
     }
-vld2_dup_s32_(a as _)
+    _vld4q_s32(a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0i8")]
-        fn vld2q_dup_s8_(ptr: *const i8, size: i32) -> int8x16x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0i8")]
+        fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
     }
-vld2q_dup_s8_(a as *const i8, 1)
+    _vld4_f32(a as *const i8, 4)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v16i8.p0i8")]
-        fn vld2q_dup_s8_(ptr: *const i8) -> int8x16x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0i8")]
+        fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
     }
-vld2q_dup_s8_(a as _)
+    _vld4q_f32(a as *const i8, 4)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0i8")]
-        fn vld2q_dup_s16_(ptr: *const i8, size: i32) -> int16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0i8")]
+        fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
     }
-vld2q_dup_s16_(a as *const i8, 2)
+    _vld4_s8(a as *const i8, 1)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v8i16.p0i16")]
-        fn vld2q_dup_s16_(ptr: *const i16) -> int16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0i8")]
+        fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
     }
-vld2q_dup_s16_(a as _)
+    _vld4q_s8(a as *const i8, 1)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0i8")]
-        fn vld2q_dup_s32_(ptr: *const i8, size: i32) -> int32x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0i8")]
+        fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
     }
-vld2q_dup_s32_(a as *const i8, 4)
+    _vld4_s16(a as *const i8, 2)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v4i32.p0i32")]
-        fn vld2q_dup_s32_(ptr: *const i32) -> int32x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0i8")]
+        fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
     }
-vld2q_dup_s32_(a as _)
+    _vld4q_s16(a as *const i8, 2)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0i8")]
-        fn vld2_dup_s64_(ptr: *const i8, size: i32) -> int64x1x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0i8")]
+        fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
     }
-vld2_dup_s64_(a as *const i8, 8)
+    _vld4_s32(a as *const i8, 4)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v1i64.p0i64")]
-        fn vld2_dup_s64_(ptr: *const i64) -> int64x1x2_t;
-    }
-vld2_dup_s64_(a as _)
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
-    transmute(vld2_dup_s8(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
-    transmute(vld2_dup_s16(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
-    transmute(vld2_dup_s32(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
-    transmute(vld2q_dup_s8(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
-    transmute(vld2q_dup_s16(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
-    transmute(vld2q_dup_s32(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
-    transmute(vld2_dup_s8(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
-    transmute(vld2_dup_s16(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
-    transmute(vld2q_dup_s8(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
-    transmute(vld2q_dup_s16(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
-    transmute(vld2_dup_s64(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
-    transmute(vld2_dup_s64(transmute(a)))
-}
-
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)
-#[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0i8")]
-        fn vld2_dup_f32_(ptr: *const i8, size: i32) -> float32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0i8")]
+        fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
     }
-vld2_dup_f32_(a as *const i8, 4)
+    _vld4q_s32(a as *const i8, 4)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v2f32.p0f32")]
-        fn vld2_dup_f32_(ptr: *const f32) -> float32x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0i8"
+        )]
+        fn _vld4_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x2x4_t;
     }
-vld2_dup_f32_(a as _)
+    _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0i8")]
-        fn vld2q_dup_f32_(ptr: *const i8, size: i32) -> float32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0i8"
+        )]
+        fn _vld4q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x4x4_t;
     }
-vld2q_dup_f32_(a as *const i8, 4)
+    _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load single 2-element structure and replicate to all lanes of two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2r))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2r.v4f32.p0f32")]
-        fn vld2q_dup_f32_(ptr: *const f32) -> float32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0i8"
+        )]
+        fn _vld4_lane_s8(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x8x4_t;
     }
-vld2q_dup_f32_(a as _)
+    _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0i8")]
-        fn vld2_lane_s8_(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32) -> int8x8x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0i8"
+        )]
+        fn _vld4_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x4x4_t;
     }
-vld2_lane_s8_(a as _, b.0, b.1, LANE, 1)
+    _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
+pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0i8")]
-        fn vld2_lane_s8_(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0i8"
+        )]
+        fn _vld4q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x8x4_t;
     }
-vld2_lane_s8_(b.0, b.1, LANE as i64, a as _)
+    _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0i8")]
-        fn vld2_lane_s16_(ptr: *const i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32) -> int16x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0i8"
+        )]
+        fn _vld4_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x2x4_t;
     }
-vld2_lane_s16_(a as _, b.0, b.1, LANE, 2)
+    _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
+pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0i8")]
-        fn vld2_lane_s16_(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0i8"
+        )]
+        fn _vld4q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x4x4_t;
     }
-vld2_lane_s16_(b.0, b.1, LANE as i64, a as _)
+    _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
+pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0i8")]
-        fn vld2_lane_s32_(ptr: *const i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32) -> int32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0i8")]
+        fn _vld4_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x4_t;
     }
-vld2_lane_s32_(a as _, b.0, b.1, LANE, 4)
+    vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0i8")]
-        fn vld2_lane_s32_(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0i8")]
+        fn _vld4q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x4_t;
     }
-vld2_lane_s32_(b.0, b.1, LANE as i64, a as _)
+    vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
+pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0i8")]
-        fn vld2q_lane_s16_(ptr: *const i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32) -> int16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0i8")]
+        fn _vld4_lane_s8(
+            ptr: *const i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i32,
+            size: i32,
+        ) -> int8x8x4_t;
     }
-vld2q_lane_s16_(a as _, b.0, b.1, LANE, 2)
+    vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0i8")]
-        fn vld2q_lane_s16_(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0i8")]
+        fn _vld4_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x4_t;
     }
-vld2q_lane_s16_(b.0, b.1, LANE as i64, a as _)
+    vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0i8")]
+        fn _vld4q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x4_t;
+    }
+    vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+}
+
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0i8")]
-        fn vld2q_lane_s32_(ptr: *const i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32) -> int32x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0i8")]
+        fn _vld4_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x4_t;
     }
-vld2q_lane_s32_(a as _, b.0, b.1, LANE, 4)
+    vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0i8")]
-        fn vld2q_lane_s32_(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0i8")]
+        fn _vld4q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x4_t;
     }
-vld2q_lane_s32_(b.0, b.1, LANE as i64, a as _)
+    vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
+    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
+    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld2_lane_s32::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld4_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2q_lane_s32::<LANE>(transmute(a), transmute(b)))
+    transmute(vld4q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
+    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
+    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
+    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0i8")]
-        fn vld2_lane_f32_(ptr: *const i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32) -> float32x2x2_t;
-    }
-vld2_lane_f32_(a as _, b.0, b.1, LANE, 4)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
+    transmute(vld4_s64(transmute(a)))
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0i8")]
-        fn vld2_lane_f32_(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v1i64.p0v1i64"
+        )]
+        fn _vld4_s64(ptr: *const int64x1_t) -> int64x1x4_t;
     }
-vld2_lane_f32_(b.0, b.1, LANE as i64, a as _)
+    _vld4_s64(a as _)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0i8")]
-        fn vld2q_lane_f32_(ptr: *const i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32) -> float32x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0i8")]
+        fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
     }
-vld2q_lane_f32_(a as _, b.0, b.1, LANE, 4)
+    _vld4_s64(a as *const i8, 8)
 }
 
-/// Load multiple 2-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0i8")]
-        fn vld2q_lane_f32_(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8) -> float32x4x2_t;
-    }
-vld2q_lane_f32_(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
+    transmute(vld4_s64(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0i8")]
-        fn vld3_s8_(ptr: *const i8, size: i32) -> int8x8x3_t;
-    }
-vld3_s8_(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
+    transmute(vld4_s8(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v8i8.p0v8i8")]
-        fn vld3_s8_(ptr: *const int8x8_t) -> int8x8x3_t;
-    }
-vld3_s8_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
+    transmute(vld4q_s8(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0i8")]
-        fn vld3_s16_(ptr: *const i8, size: i32) -> int16x4x3_t;
-    }
-vld3_s16_(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
+    transmute(vld4_s16(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v4i16.p0v4i16")]
-        fn vld3_s16_(ptr: *const int16x4_t) -> int16x4x3_t;
-    }
-vld3_s16_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
+    transmute(vld4q_s16(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0i8")]
-        fn vld3_s32_(ptr: *const i8, size: i32) -> int32x2x3_t;
-    }
-vld3_s32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
+    transmute(vld4_s32(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v2i32.p0v2i32")]
-        fn vld3_s32_(ptr: *const int32x2_t) -> int32x2x3_t;
-    }
-vld3_s32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
+    transmute(vld4q_s32(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0i8")]
-        fn vld3q_s8_(ptr: *const i8, size: i32) -> int8x16x3_t;
-    }
-vld3q_s8_(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
+    transmute(vld4_s8(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v16i8.p0v16i8")]
-        fn vld3q_s8_(ptr: *const int8x16_t) -> int8x16x3_t;
-    }
-vld3q_s8_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
+    transmute(vld4q_s8(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0i8")]
-        fn vld3q_s16_(ptr: *const i8, size: i32) -> int16x8x3_t;
-    }
-vld3q_s16_(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
+    transmute(vld4_s16(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v8i16.p0v8i16")]
-        fn vld3q_s16_(ptr: *const int16x8_t) -> int16x8x3_t;
-    }
-vld3q_s16_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
+    transmute(vld4q_s16(transmute(a)))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0i8")]
-        fn vld3q_s32_(ptr: *const i8, size: i32) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v2f32"
+        )]
+        fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-vld3q_s32_(a as *const i8, 4)
+    _vmax_f32(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v4i32.p0v4i32")]
-        fn vld3q_s32_(ptr: *const int32x4_t) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v4f32"
+        )]
+        fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-vld3q_s32_(a as _)
+    _vmaxq_f32(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0i8")]
-        fn vld3_s64_(ptr: *const i8, size: i32) -> int64x1x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v8i8"
+        )]
+        fn _vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vld3_s64_(a as *const i8, 8)
+    _vmax_s8(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v1i64.p0v1i64")]
-        fn vld3_s64_(ptr: *const int64x1_t) -> int64x1x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v16i8"
+        )]
+        fn _vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vld3_s64_(a as _)
+    _vmaxq_s8(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
-    transmute(vld3_s8(transmute(a)))
-}
-
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
-    transmute(vld3_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v4i16"
+        )]
+        fn _vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vmax_s16(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
-    transmute(vld3_s32(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v8i16"
+        )]
+        fn _vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vmaxq_s16(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
-    transmute(vld3q_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v2i32"
+        )]
+        fn _vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vmax_s32(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
-    transmute(vld3q_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v4i32"
+        )]
+        fn _vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vmaxq_s32(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
-    transmute(vld3q_s32(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v8i8"
+        )]
+        fn _vmax_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vmax_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
-    transmute(vld3_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v16i8"
+        )]
+        fn _vmaxq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vmaxq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
-    transmute(vld3_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v4i16"
+        )]
+        fn _vmax_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vmax_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
-    transmute(vld3q_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v8i16"
+        )]
+        fn _vmaxq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vmaxq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
-    transmute(vld3q_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v2i32"
+        )]
+        fn _vmax_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vmax_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
-    transmute(vld3_s64(transmute(a)))
-}
-
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
-    transmute(vld3_s64(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v4i32"
+        )]
+        fn _vmaxq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vmaxq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmaxnm)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0i8")]
-        fn vld3_f32_(ptr: *const i8, size: i32) -> float32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v2f32"
+        )]
+        fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-vld3_f32_(a as *const i8, 4)
+    _vmaxnm_f32(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmaxnm)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v2f32.p0v2f32")]
-        fn vld3_f32_(ptr: *const float32x2_t) -> float32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v4f32"
+        )]
+        fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-vld3_f32_(a as _)
+    _vmaxnmq_f32(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0i8")]
-        fn vld3q_f32_(ptr: *const i8, size: i32) -> float32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v2f32"
+        )]
+        fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-vld3q_f32_(a as *const i8, 4)
+    _vmin_f32(a, b)
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3.v4f32.p0v4f32")]
-        fn vld3q_f32_(ptr: *const float32x4_t) -> float32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v4f32"
+        )]
+        fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-vld3q_f32_(a as _)
+    _vminq_f32(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0i8")]
-        fn vld3_dup_s8_(ptr: *const i8, size: i32) -> int8x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v8i8"
+        )]
+        fn _vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vld3_dup_s8_(a as *const i8, 1)
+    _vmin_s8(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v8i8.p0i8")]
-        fn vld3_dup_s8_(ptr: *const i8) -> int8x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v16i8"
+        )]
+        fn _vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vld3_dup_s8_(a as _)
+    _vminq_s8(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0i8")]
-        fn vld3_dup_s16_(ptr: *const i8, size: i32) -> int16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v4i16"
+        )]
+        fn _vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vld3_dup_s16_(a as *const i8, 2)
+    _vmin_s16(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v4i16.p0i16")]
-        fn vld3_dup_s16_(ptr: *const i16) -> int16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v8i16"
+        )]
+        fn _vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vld3_dup_s16_(a as _)
+    _vminq_s16(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0i8")]
-        fn vld3_dup_s32_(ptr: *const i8, size: i32) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v2i32"
+        )]
+        fn _vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vld3_dup_s32_(a as *const i8, 4)
+    _vmin_s32(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v2i32.p0i32")]
-        fn vld3_dup_s32_(ptr: *const i32) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v4i32"
+        )]
+        fn _vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vld3_dup_s32_(a as _)
+    _vminq_s32(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0i8")]
-        fn vld3q_dup_s8_(ptr: *const i8, size: i32) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v8i8"
+        )]
+        fn _vmin_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vld3q_dup_s8_(a as *const i8, 1)
+    _vmin_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v16i8.p0i8")]
-        fn vld3q_dup_s8_(ptr: *const i8) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v16i8"
+        )]
+        fn _vminq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vld3q_dup_s8_(a as _)
+    _vminq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0i8")]
-        fn vld3q_dup_s16_(ptr: *const i8, size: i32) -> int16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v4i16"
+        )]
+        fn _vmin_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vld3q_dup_s16_(a as *const i8, 2)
+    _vmin_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v8i16.p0i16")]
-        fn vld3q_dup_s16_(ptr: *const i16) -> int16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v8i16"
+        )]
+        fn _vminq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vld3q_dup_s16_(a as _)
+    _vminq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0i8")]
-        fn vld3q_dup_s32_(ptr: *const i8, size: i32) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v2i32"
+        )]
+        fn _vmin_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vld3q_dup_s32_(a as *const i8, 4)
+    _vmin_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v4i32.p0i32")]
-        fn vld3q_dup_s32_(ptr: *const i32) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v4i32"
+        )]
+        fn _vminq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vld3q_dup_s32_(a as _)
+    _vminq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fminnm)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0i8")]
-        fn vld3_dup_s64_(ptr: *const i8, size: i32) -> int64x1x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v2f32"
+        )]
+        fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-vld3_dup_s64_(a as *const i8, 8)
+    _vminnm_f32(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fminnm)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v1i64.p0i64")]
-        fn vld3_dup_s64_(ptr: *const i64) -> int64x1x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v4f32"
+        )]
+        fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-vld3_dup_s64_(a as _)
+    _vminnmq_f32(a, b)
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
-    transmute(vld3_dup_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
-    transmute(vld3_dup_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
-    transmute(vld3_dup_s32(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
-    transmute(vld3q_dup_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
-    transmute(vld3q_dup_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_f32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
-    transmute(vld3q_dup_s32(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_f32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
-    transmute(vld3_dup_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_lane_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
-    transmute(vld3_dup_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_lane_u16<const LANE: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+    c: uint16x4_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
-    transmute(vld3q_dup_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_laneq_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmla_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
-    transmute(vld3q_dup_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_laneq_u16<const LANE: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+    c: uint16x8_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmla_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
-    transmute(vld3_dup_s64(transmute(a)))
-}
-
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_lane_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
-    transmute(vld3_dup_s64(transmute(a)))
-}
-
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_lane_u16<const LANE: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+    c: uint16x4_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0i8")]
-        fn vld3_dup_f32_(ptr: *const i8, size: i32) -> float32x2x3_t;
-    }
-vld3_dup_f32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_laneq_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlaq_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_laneq_u16<const LANE: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+    c: uint16x8_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlaq_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v2f32.p0f32")]
-        fn vld3_dup_f32_(ptr: *const f32) -> float32x2x3_t;
-    }
-vld3_dup_f32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_lane_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+    c: uint32x2_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0i8")]
-        fn vld3q_dup_f32_(ptr: *const i8, size: i32) -> float32x4x3_t;
-    }
-vld3q_dup_f32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load single 3-element structure and replicate to all lanes of three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3r.v4f32.p0f32")]
-        fn vld3q_dup_f32_(ptr: *const f32) -> float32x4x3_t;
-    }
-vld3q_dup_f32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_laneq_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+    c: uint32x4_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0i8")]
-        fn vld3_lane_s8_(ptr: *const i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32) -> int8x8x3_t;
-    }
-vld3_lane_s8_(a as _, b.0, b.1, b.2, LANE, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0i8")]
-        fn vld3_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *const i8) -> int8x8x3_t;
-    }
-vld3_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_lane_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x2_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0i8")]
-        fn vld3_lane_s16_(ptr: *const i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i32, size: i32) -> int16x4x3_t;
-    }
-vld3_lane_s16_(a as _, b.0, b.1, b.2, LANE, 2)
+    vmlaq_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_laneq_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0i8")]
-        fn vld3_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *const i8) -> int16x4x3_t;
-    }
-vld3_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _)
+    vmlaq_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0i8")]
-        fn vld3_lane_s32_(ptr: *const i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i32, size: i32) -> int32x2x3_t;
-    }
-vld3_lane_s32_(a as _, b.0, b.1, b.2, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vmla_f32(a, b, vdup_n_f32(c))
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0i8")]
-        fn vld3_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *const i8) -> int32x2x3_t;
-    }
-vld3_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vmlaq_f32(a, b, vdupq_n_f32(c))
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0i8")]
-        fn vld3q_lane_s16_(ptr: *const i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i32, size: i32) -> int16x8x3_t;
-    }
-vld3q_lane_s16_(a as _, b.0, b.1, b.2, LANE, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
+    vmla_s16(a, b, vdup_n_s16(c))
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0i8")]
-        fn vld3q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *const i8) -> int16x8x3_t;
-    }
-vld3q_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
+    vmlaq_s16(a, b, vdupq_n_s16(c))
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0i8")]
-        fn vld3q_lane_s32_(ptr: *const i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i32, size: i32) -> int32x4x3_t;
-    }
-vld3q_lane_s32_(a as _, b.0, b.1, b.2, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
+    vmla_u16(a, b, vdup_n_u16(c))
 }
 
-/// Load multiple 3-element structures to two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0i8")]
-        fn vld3q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *const i8) -> int32x4x3_t;
-    }
-vld3q_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
+    vmlaq_u16(a, b, vdupq_n_u16(c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
+    vmla_s32(a, b, vdup_n_s32(c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
+    vmlaq_s32(a, b, vdupq_n_s32(c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld3_lane_s32::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
+    vmla_u32(a, b, vdup_n_u32(c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
+    vmlaq_u32(a, b, vdupq_n_u32(c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3q_lane_s32::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0i8")]
-        fn vld3_lane_f32_(ptr: *const i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i32, size: i32) -> float32x2x3_t;
-    }
-vld3_lane_f32_(a as _, b.0, b.1, b.2, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0i8")]
-        fn vld3_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *const i8) -> float32x2x3_t;
-    }
-vld3_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0i8")]
-        fn vld3q_lane_f32_(ptr: *const i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i32, size: i32) -> float32x4x3_t;
-    }
-vld3q_lane_f32_(a as _, b.0, b.1, b.2, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 3-element structures to three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0i8")]
-        fn vld3q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *const i8) -> float32x4x3_t;
-    }
-vld3q_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0i8")]
-        fn vld4_s8_(ptr: *const i8, size: i32) -> int8x8x4_t;
-    }
-vld4_s8_(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v8i8.p0v8i8")]
-        fn vld4_s8_(ptr: *const int8x8_t) -> int8x8x4_t;
-    }
-vld4_s8_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0i8")]
-        fn vld4_s16_(ptr: *const i8, size: i32) -> int16x4x4_t;
-    }
-vld4_s16_(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v4i16.p0v4i16")]
-        fn vld4_s16_(ptr: *const int16x4_t) -> int16x4x4_t;
-    }
-vld4_s16_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    simd_add(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0i8")]
-        fn vld4_s32_(ptr: *const i8, size: i32) -> int32x2x4_t;
-    }
-vld4_s32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlal, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_lane_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v2i32.p0v2i32")]
-        fn vld4_s32_(ptr: *const int32x2_t) -> int32x2x4_t;
-    }
-vld4_s32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlal, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_laneq_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0i8")]
-        fn vld4q_s8_(ptr: *const i8, size: i32) -> int8x16x4_t;
-    }
-vld4q_s8_(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlal, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_lane_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v16i8.p0v16i8")]
-        fn vld4q_s8_(ptr: *const int8x16_t) -> int8x16x4_t;
-    }
-vld4q_s8_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlal, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_laneq_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0i8")]
-        fn vld4q_s16_(ptr: *const i8, size: i32) -> int16x8x4_t;
-    }
-vld4q_s16_(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlal, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_lane_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x4_t,
+    c: uint16x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v8i16.p0v8i16")]
-        fn vld4q_s16_(ptr: *const int16x8_t) -> int16x8x4_t;
-    }
-vld4q_s16_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlal, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_laneq_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x4_t,
+    c: uint16x8_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0i8")]
-        fn vld4q_s32_(ptr: *const i8, size: i32) -> int32x4x4_t;
-    }
-vld4q_s32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlal, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_lane_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x2_t,
+    c: uint32x2_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v4i32.p0v4i32")]
-        fn vld4q_s32_(ptr: *const int32x4_t) -> int32x4x4_t;
-    }
-vld4q_s32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlal, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_laneq_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x2_t,
+    c: uint32x4_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0i8")]
-        fn vld4_s64_(ptr: *const i8, size: i32) -> int64x1x4_t;
-    }
-vld4_s64_(a as *const i8, 8)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vmlal_s16(a, b, vdup_n_s16(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v1i64.p0v1i64")]
-        fn vld4_s64_(ptr: *const int64x1_t) -> int64x1x4_t;
-    }
-vld4_s64_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vmlal_s32(a, b, vdup_n_s32(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
-    transmute(vld4_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
+    vmlal_u16(a, b, vdup_n_u16(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
-    transmute(vld4_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
+    vmlal_u32(a, b, vdup_n_u32(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
-    transmute(vld4_s32(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    simd_add(a, vmull_s8(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
-    transmute(vld4q_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    simd_add(a, vmull_s16(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
-    transmute(vld4q_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    simd_add(a, vmull_s32(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
-    transmute(vld4q_s32(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    simd_add(a, vmull_u8(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
-    transmute(vld4_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    simd_add(a, vmull_u16(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
-    transmute(vld4_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    simd_add(a, vmull_u32(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
-    transmute(vld4q_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
-    transmute(vld4q_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
-    transmute(vld4_s64(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
-    transmute(vld4_s64(transmute(a)))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0i8")]
-        fn vld4_f32_(ptr: *const i8, size: i32) -> float32x2x4_t;
-    }
-vld4_f32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_f32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v2f32.p0v2f32")]
-        fn vld4_f32_(ptr: *const float32x2_t) -> float32x2x4_t;
-    }
-vld4_f32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_f32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0i8")]
-        fn vld4q_f32_(ptr: *const i8, size: i32) -> float32x4x4_t;
-    }
-vld4q_f32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_lane_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4.v4f32.p0v4f32")]
-        fn vld4q_f32_(ptr: *const float32x4_t) -> float32x4x4_t;
-    }
-vld4q_f32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_lane_u16<const LANE: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+    c: uint16x4_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0i8")]
-        fn vld4_dup_s8_(ptr: *const i8, size: i32) -> int8x8x4_t;
-    }
-vld4_dup_s8_(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_laneq_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmls_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v8i8.p0i8")]
-        fn vld4_dup_s8_(ptr: *const i8) -> int8x8x4_t;
-    }
-vld4_dup_s8_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_laneq_u16<const LANE: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+    c: uint16x8_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmls_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0i8")]
-        fn vld4_dup_s16_(ptr: *const i8, size: i32) -> int16x4x4_t;
-    }
-vld4_dup_s16_(a as *const i8, 2)
-}
-
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_lane_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v4i16.p0i16")]
-        fn vld4_dup_s16_(ptr: *const i16) -> int16x4x4_t;
-    }
-vld4_dup_s16_(a as _)
-}
-
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_lane_u16<const LANE: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+    c: uint16x4_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0i8")]
-        fn vld4_dup_s32_(ptr: *const i8, size: i32) -> int32x2x4_t;
-    }
-vld4_dup_s32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_laneq_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsq_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_laneq_u16<const LANE: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+    c: uint16x8_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsq_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v2i32.p0i32")]
-        fn vld4_dup_s32_(ptr: *const i32) -> int32x2x4_t;
-    }
-vld4_dup_s32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_lane_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+    c: uint32x2_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0i8")]
-        fn vld4q_dup_s8_(ptr: *const i8, size: i32) -> int8x16x4_t;
-    }
-vld4q_dup_s8_(a as *const i8, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v16i8.p0i8")]
-        fn vld4q_dup_s8_(ptr: *const i8) -> int8x16x4_t;
-    }
-vld4q_dup_s8_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_laneq_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+    c: uint32x4_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0i8")]
-        fn vld4q_dup_s16_(ptr: *const i8, size: i32) -> int16x8x4_t;
-    }
-vld4q_dup_s16_(a as *const i8, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v8i16.p0i16")]
-        fn vld4q_dup_s16_(ptr: *const i16) -> int16x8x4_t;
-    }
-vld4q_dup_s16_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_lane_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x2_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0i8")]
-        fn vld4q_dup_s32_(ptr: *const i8, size: i32) -> int32x4x4_t;
-    }
-vld4q_dup_s32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v4i32.p0i32")]
-        fn vld4q_dup_s32_(ptr: *const i32) -> int32x4x4_t;
-    }
-vld4q_dup_s32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_laneq_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0i8")]
-        fn vld4_dup_s64_(ptr: *const i8, size: i32) -> int64x1x4_t;
-    }
-vld4_dup_s64_(a as *const i8, 8)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vmls_f32(a, b, vdup_n_f32(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v1i64.p0i64")]
-        fn vld4_dup_s64_(ptr: *const i64) -> int64x1x4_t;
-    }
-vld4_dup_s64_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vmlsq_f32(a, b, vdupq_n_f32(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
-    transmute(vld4_dup_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
+    vmls_s16(a, b, vdup_n_s16(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
-    transmute(vld4_dup_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
+    vmlsq_s16(a, b, vdupq_n_s16(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
-    transmute(vld4_dup_s32(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
+    vmls_u16(a, b, vdup_n_u16(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
-    transmute(vld4q_dup_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
+    vmlsq_u16(a, b, vdupq_n_u16(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
-    transmute(vld4q_dup_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
+    vmls_s32(a, b, vdup_n_s32(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
-    transmute(vld4q_dup_s32(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
+    vmlsq_s32(a, b, vdupq_n_s32(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
-    transmute(vld4_dup_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
+    vmls_u32(a, b, vdup_n_u32(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
-    transmute(vld4_dup_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
+    vmlsq_u32(a, b, vdupq_n_u32(c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
-    transmute(vld4q_dup_s8(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
-    transmute(vld4q_dup_s16(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
-    transmute(vld4_dup_s64(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4r))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
-    transmute(vld4_dup_s64(transmute(a)))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0i8")]
-        fn vld4_dup_f32_(ptr: *const i8, size: i32) -> float32x2x4_t;
-    }
-vld4_dup_f32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v2f32.p0f32")]
-        fn vld4_dup_f32_(ptr: *const f32) -> float32x2x4_t;
-    }
-vld4_dup_f32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0i8")]
-        fn vld4q_dup_f32_(ptr: *const i8, size: i32) -> float32x4x4_t;
-    }
-vld4q_dup_f32_(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load single 4-element structure and replicate to all lanes of four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4r.v4f32.p0f32")]
-        fn vld4q_dup_f32_(ptr: *const f32) -> float32x4x4_t;
-    }
-vld4q_dup_f32_(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0i8")]
-        fn vld4_lane_s8_(ptr: *const i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i32, size: i32) -> int8x8x4_t;
-    }
-vld4_lane_s8_(a as _, b.0, b.1, b.2, b.3, LANE, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0i8")]
-        fn vld4_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *const i8) -> int8x8x4_t;
-    }
-vld4_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0i8")]
-        fn vld4_lane_s16_(ptr: *const i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i32, size: i32) -> int16x4x4_t;
-    }
-vld4_lane_s16_(a as _, b.0, b.1, b.2, b.3, LANE, 2)
-}
-
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)
-#[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0i8")]
-        fn vld4_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i64, ptr: *const i8) -> int16x4x4_t;
-    }
-vld4_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0i8")]
-        fn vld4_lane_s32_(ptr: *const i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i32, size: i32) -> int32x2x4_t;
-    }
-vld4_lane_s32_(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mls)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0i8")]
-        fn vld4_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i64, ptr: *const i8) -> int32x2x4_t;
-    }
-vld4_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlsl, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_lane_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlsl, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_laneq_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0i8")]
-        fn vld4q_lane_s16_(ptr: *const i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i32, size: i32) -> int16x8x4_t;
-    }
-vld4q_lane_s16_(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+    vmlsl_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0i8")]
-        fn vld4q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i64, ptr: *const i8) -> int16x8x4_t;
-    }
-vld4q_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlsl, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_lane_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlsl, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_laneq_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0i8")]
-        fn vld4q_lane_s32_(ptr: *const i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i32, size: i32) -> int32x4x4_t;
-    }
-vld4q_lane_s32_(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlsl, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_lane_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x4_t,
+    c: uint16x4_t,
+) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0i8")]
-        fn vld4q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i64, ptr: *const i8) -> int32x4x4_t;
-    }
-vld4q_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    vmlsl_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlsl, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_laneq_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x4_t,
+    c: uint16x8_t,
+) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
+    vmlsl_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlsl, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_lane_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x2_t,
+    c: uint32x2_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld4_lane_s32::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlsl, LANE = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_laneq_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x2_t,
+    c: uint32x4_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vmlsl_s16(a, b, vdup_n_s16(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4q_lane_s32::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vmlsl_s32(a, b, vdup_n_s32(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
+    vmlsl_u16(a, b, vdup_n_u16(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
+    vmlsl_u32(a, b, vdup_n_u32(c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    simd_sub(a, vmull_s8(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0i8")]
-        fn vld4_lane_f32_(ptr: *const i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i32, size: i32) -> float32x2x4_t;
-    }
-vld4_lane_f32_(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    simd_sub(a, vmull_s16(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0i8")]
-        fn vld4_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i64, ptr: *const i8) -> float32x2x4_t;
-    }
-vld4_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    simd_sub(a, vmull_s32(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0i8")]
-        fn vld4q_lane_f32_(ptr: *const i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i32, size: i32) -> float32x4x4_t;
-    }
-vld4q_lane_f32_(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    simd_sub(a, vmull_u8(b, c))
 }
 
-/// Load multiple 4-element structures to four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0i8")]
-        fn vld4q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i64, ptr: *const i8) -> float32x4x4_t;
-    }
-vld4q_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    simd_sub(a, vmull_u16(b, c))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    simd_sub(a, vmull_u32(b, c))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1_t) {
-    static_assert!(LANE == 0);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4_t) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2_t) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
+}
+
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4_t) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2_t) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1_t) {
-    static_assert!(LANE == 0);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8_t) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)
+    simd_mul(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4_t) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8_t) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4_t) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_mul(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
+    simd_mul(a, vdup_n_f32(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1_t) {
-    static_assert!(LANE == 0);
-    *a = simd_extract!(b, LANE as u32);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
+    simd_mul(a, vdupq_n_f32(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    simd_mul(a, vdup_n_s16(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    simd_mul(a, vdupq_n_s16(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    simd_mul(a, vdup_n_s32(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v8i8")]
-        fn vst1_s8_x2_(ptr: *mut i8, a: int8x8_t, b: int8x8_t);
-    }
-vst1_s8_x2_(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    simd_mul(a, vdupq_n_s32(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v8i8.p0i8")]
-        fn vst1_s8_x2_(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
-    }
-vst1_s8_x2_(b.0, b.1, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
+    simd_mul(a, vdup_n_u16(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v4i16")]
-        fn vst1_s16_x2_(ptr: *mut i16, a: int16x4_t, b: int16x4_t);
-    }
-vst1_s16_x2_(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
+    simd_mul(a, vdupq_n_u16(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v4i16.p0i16")]
-        fn vst1_s16_x2_(a: int16x4_t, b: int16x4_t, ptr: *mut i16);
-    }
-vst1_s16_x2_(b.0, b.1, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
+    simd_mul(a, vdup_n_u32(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v2i32")]
-        fn vst1_s32_x2_(ptr: *mut i32, a: int32x2_t, b: int32x2_t);
-    }
-vst1_s32_x2_(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
+    simd_mul(a, vdupq_n_u32(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(pmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v2i32.p0i32")]
-        fn vst1_s32_x2_(a: int32x2_t, b: int32x2_t, ptr: *mut i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.pmul.v8i8"
+        )]
+        fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
     }
-vst1_s32_x2_(b.0, b.1, a)
+    _vmul_p8(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(pmul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v1i64")]
-        fn vst1_s64_x2_(ptr: *mut i64, a: int64x1_t, b: int64x1_t);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.pmul.v16i8"
+        )]
+        fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
     }
-vst1_s64_x2_(a, b.0, b.1)
+    _vmulq_p8(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v1i64.p0i64")]
-        fn vst1_s64_x2_(a: int64x1_t, b: int64x1_t, ptr: *mut i64);
-    }
-vst1_s64_x2_(b.0, b.1, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v16i8")]
-        fn vst1q_s8_x2_(ptr: *mut i8, a: int8x16_t, b: int8x16_t);
-    }
-vst1q_s8_x2_(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v16i8.p0i8")]
-        fn vst1q_s8_x2_(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
-    }
-vst1q_s8_x2_(b.0, b.1, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v8i16")]
-        fn vst1q_s16_x2_(ptr: *mut i16, a: int16x8_t, b: int16x8_t);
-    }
-vst1q_s16_x2_(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v8i16.p0i16")]
-        fn vst1q_s16_x2_(a: int16x8_t, b: int16x8_t, ptr: *mut i16);
-    }
-vst1q_s16_x2_(b.0, b.1, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v4i32")]
-        fn vst1q_s32_x2_(ptr: *mut i32, a: int32x4_t, b: int32x4_t);
-    }
-vst1q_s32_x2_(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v4i32.p0i32")]
-        fn vst1q_s32_x2_(a: int32x4_t, b: int32x4_t, ptr: *mut i32);
-    }
-vst1q_s32_x2_(b.0, b.1, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v2i64")]
-        fn vst1q_s64_x2_(ptr: *mut i64, a: int64x2_t, b: int64x2_t);
-    }
-vst1q_s64_x2_(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v2i64.p0i64")]
-        fn vst1q_s64_x2_(a: int64x2_t, b: int64x2_t, ptr: *mut i64);
-    }
-vst1q_s64_x2_(b.0, b.1, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v8i8")]
-        fn vst1_s8_x3_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t);
-    }
-vst1_s8_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v8i8.p0i8")]
-        fn vst1_s8_x3_(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
-    }
-vst1_s8_x3_(b.0, b.1, b.2, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v4i16")]
-        fn vst1_s16_x3_(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t);
-    }
-vst1_s16_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_mul(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v4i16.p0i16")]
-        fn vst1_s16_x3_(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16);
-    }
-vst1_s16_x3_(b.0, b.1, b.2, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smull, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_s16(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v2i32")]
-        fn vst1_s32_x3_(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t);
-    }
-vst1_s32_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smull, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_s16(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v2i32.p0i32")]
-        fn vst1_s32_x3_(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32);
-    }
-vst1_s32_x3_(b.0, b.1, b.2, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smull, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v1i64")]
-        fn vst1_s64_x3_(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t);
-    }
-vst1_s64_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smull, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v1i64.p0i64")]
-        fn vst1_s64_x3_(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64);
-    }
-vst1_s64_x3_(b.0, b.1, b.2, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umull, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_u16(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v16i8")]
-        fn vst1q_s8_x3_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t);
-    }
-vst1q_s8_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umull, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_u16(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v16i8.p0i8")]
-        fn vst1q_s8_x3_(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
-    }
-vst1q_s8_x3_(b.0, b.1, b.2, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umull, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v8i16")]
-        fn vst1q_s16_x3_(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t);
-    }
-vst1q_s16_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umull, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v8i16.p0i16")]
-        fn vst1q_s16_x3_(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16);
-    }
-vst1q_s16_x3_(b.0, b.1, b.2, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vmull_s16(a, vdup_n_s16(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v4i32")]
-        fn vst1q_s32_x3_(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t);
-    }
-vst1q_s32_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vmull_s32(a, vdup_n_s32(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v4i32.p0i32")]
-        fn vst1q_s32_x3_(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32);
-    }
-vst1q_s32_x3_(b.0, b.1, b.2, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
+    vmull_u16(a, vdup_n_u16(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v2i64")]
-        fn vst1q_s64_x3_(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t);
-    }
-vst1q_s64_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
+    vmull_u32(a, vdup_n_u32(b))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(pmull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v2i64.p0i64")]
-        fn vst1q_s64_x3_(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.pmull.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")]
+        fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
     }
-vst1q_s64_x3_(b.0, b.1, b.2, a)
+    _vmull_p8(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v8i8")]
-        fn vst1_s8_x4_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smull.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")]
+        fn _vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
     }
-vst1_s8_x4_(a, b.0, b.1, b.2, b.3)
+    _vmull_s16(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v8i8.p0i8")]
-        fn vst1_s8_x4_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smull.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")]
+        fn _vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
     }
-vst1_s8_x4_(b.0, b.1, b.2, b.3, a)
+    _vmull_s32(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v4i16")]
-        fn vst1_s16_x4_(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smull.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")]
+        fn _vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t;
     }
-vst1_s16_x4_(a, b.0, b.1, b.2, b.3)
+    _vmull_s8(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v4i16.p0i16")]
-        fn vst1_s16_x4_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umull.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")]
+        fn _vmull_u8(a: int8x8_t, b: int8x8_t) -> int16x8_t;
     }
-vst1_s16_x4_(b.0, b.1, b.2, b.3, a)
+    _vmull_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v2i32")]
-        fn vst1_s32_x4_(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umull.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")]
+        fn _vmull_u16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
     }
-vst1_s32_x4_(a, b.0, b.1, b.2, b.3)
+    _vmull_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v2i32.p0i32")]
-        fn vst1_s32_x4_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umull.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")]
+        fn _vmull_u32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
     }
-vst1_s32_x4_(b.0, b.1, b.2, b.3, a)
+    _vmull_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v1i64")]
-        fn vst1_s64_x4_(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t);
-    }
-vst1_s64_x4_(a, b.0, b.1, b.2, b.3)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t {
+    simd_neg(a)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v1i64.p0i64")]
-        fn vst1_s64_x4_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64);
-    }
-vst1_s64_x4_(b.0, b.1, b.2, b.3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t {
+    simd_neg(a)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v16i8")]
-        fn vst1q_s8_x4_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t);
-    }
-vst1q_s8_x4_(a, b.0, b.1, b.2, b.3)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(neg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t {
+    simd_neg(a)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v16i8.p0i8")]
-        fn vst1q_s8_x4_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
-    }
-vst1q_s8_x4_(b.0, b.1, b.2, b.3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(neg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t {
+    simd_neg(a)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v8i16")]
-        fn vst1q_s16_x4_(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t);
-    }
-vst1q_s16_x4_(a, b.0, b.1, b.2, b.3)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(neg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t {
+    simd_neg(a)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v8i16.p0i16")]
-        fn vst1q_s16_x4_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16);
-    }
-vst1q_s16_x4_(b.0, b.1, b.2, b.3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(neg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t {
+    simd_neg(a)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v4i32")]
-        fn vst1q_s32_x4_(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t);
-    }
-vst1q_s32_x4_(a, b.0, b.1, b.2, b.3)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(neg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t {
+    simd_neg(a)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v4i32.p0i32")]
-        fn vst1q_s32_x4_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32);
-    }
-vst1q_s32_x4_(b.0, b.1, b.2, b.3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(neg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t {
+    simd_neg(a)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v2i64")]
-        fn vst1q_s64_x4_(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t);
-    }
-vst1q_s64_x4_(a, b.0, b.1, b.2, b.3)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v2i64.p0i64")]
-        fn vst1q_s64_x4_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64);
-    }
-vst1q_s64_x4_(b.0, b.1, b.2, b.3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) {
-    vst1_s8_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) {
-    vst1_s16_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) {
-    vst1_s32_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) {
-    vst1_s64_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) {
-    vst1q_s8_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) {
-    vst1q_s16_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) {
-    vst1q_s32_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) {
-    vst1q_s64_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) {
-    vst1_s8_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) {
-    vst1_s16_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) {
-    vst1_s32_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) {
-    vst1_s64_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) {
-    vst1q_s8_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) {
-    vst1q_s16_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_or(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) {
-    vst1q_s32_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(faddp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddp.v2f32"
+        )]
+        fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vpadd_f32(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) {
-    vst1q_s64_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
+        fn _vqabs_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vqabs_s8(a)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) {
-    vst1_s8_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
+        fn _vqabsq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vqabsq_s8(a)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) {
-    vst1_s16_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
+        fn _vqabs_s16(a: int16x4_t) -> int16x4_t;
+    }
+    _vqabs_s16(a)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) {
-    vst1_s32_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
+        fn _vqabsq_s16(a: int16x8_t) -> int16x8_t;
+    }
+    _vqabsq_s16(a)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) {
-    vst1_s64_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
+        fn _vqabs_s32(a: int32x2_t) -> int32x2_t;
+    }
+    _vqabs_s32(a)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) {
-    vst1q_s8_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
+        fn _vqabsq_s32(a: int32x4_t) -> int32x4_t;
+    }
+    _vqabsq_s32(a)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) {
-    vst1q_s16_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")]
+        fn _vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqadd_s8(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) {
-    vst1q_s32_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")]
+        fn _vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqaddq_s8(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) {
-    vst1q_s64_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")]
+        fn _vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqadd_s16(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) {
-    vst1_s8_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")]
+        fn _vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqaddq_s16(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) {
-    vst1_s8_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")]
+        fn _vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqadd_s32(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) {
-    vst1_s8_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")]
+        fn _vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqaddq_s32(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) {
-    vst1q_s8_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v1i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")]
+        fn _vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqadd_s64(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) {
-    vst1q_s8_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v2i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")]
+        fn _vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqaddq_s64(a, b)
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) {
-    vst1q_s8_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")]
+        fn _vqadd_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqadd_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) {
-    vst1_s16_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")]
+        fn _vqaddq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqaddq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) {
-    vst1_s16_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")]
+        fn _vqadd_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqadd_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) {
-    vst1_s16_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")]
+        fn _vqaddq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqaddq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) {
-    vst1q_s16_x2(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")]
+        fn _vqadd_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqadd_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) {
-    vst1q_s16_x3(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")]
+        fn _vqaddq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqaddq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
-    vst1q_s16_x4(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v1i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")]
+        fn _vqadd_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqadd_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) {
-    vst1_s64_x2(transmute(a), transmute(b))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v2i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")]
+        fn _vqaddq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqaddq_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) {
-    vst1_s64_x3(transmute(a), transmute(b))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal, N = 2)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_lane_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) {
-    vst1_s64_x4(transmute(a), transmute(b))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal, N = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_lane_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) {
-    vst1q_s64_x2(transmute(a), transmute(b))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_n_s16(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
-    vst1q_s64_x3(transmute(a), transmute(b))
-}
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_n_s32(b, c))
+}
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st1))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) {
-    vst1q_s64_x4(transmute(a), transmute(b))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_s16(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v2f32")]
-        fn vst1_f32_x2_(ptr: *mut f32, a: float32x2_t, b: float32x2_t);
-    }
-vst1_f32_x2_(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_s32(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v2f32.p0f32")]
-        fn vst1_f32_x2_(a: float32x2_t, b: float32x2_t, ptr: *mut f32);
-    }
-vst1_f32_x2_(b.0, b.1, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl, N = 2)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_lane_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v4f32")]
-        fn vst1q_f32_x2_(ptr: *mut f32, a: float32x4_t, b: float32x4_t);
-    }
-vst1q_f32_x2_(a, b.0, b.1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl, N = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_lane_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x2.v4f32.p0f32")]
-        fn vst1q_f32_x2_(a: float32x4_t, b: float32x4_t, ptr: *mut f32);
-    }
-vst1q_f32_x2_(b.0, b.1, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_n_s16(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v2f32")]
-        fn vst1_f32_x3_(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t);
-    }
-vst1_f32_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_n_s32(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v2f32.p0f32")]
-        fn vst1_f32_x3_(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32);
-    }
-vst1_f32_x3_(b.0, b.1, b.2, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_s16(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v4f32")]
-        fn vst1q_f32_x3_(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t);
-    }
-vst1q_f32_x3_(a, b.0, b.1, b.2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_s32(b, c))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x3.v4f32.p0f32")]
-        fn vst1q_f32_x3_(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32);
-    }
-vst1q_f32_x3_(b.0, b.1, b.2, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32)))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v2f32")]
-        fn vst1_f32_x4_(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t);
-    }
-vst1_f32_x4_(a, b.0, b.1, b.2, b.3)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32)))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v2f32.p0f32")]
-        fn vst1_f32_x4_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut f32);
-    }
-vst1_f32_x4_(b.0, b.1, b.2, b.3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32)))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst1))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v4f32")]
-        fn vst1q_f32_x4_(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t);
-    }
-vst1q_f32_x4_(a, b.0, b.1, b.2, b.3)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32)))
 }
 
-/// Store multiple single-element structures to one, two, three, or four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st1x4.v4f32.p0f32")]
-        fn vst1q_f32_x4_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut f32);
-    }
-vst1q_f32_x4_(b.0, b.1, b.2, b.3, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    let b: int16x4_t = vdup_n_s16(b);
+    vqdmulh_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i8")]
-        fn vst2_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32);
-    }
-vst2_s8_(a as _, b.0, b.1, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    let b: int16x8_t = vdupq_n_s16(b);
+    vqdmulhq_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v8i8.p0i8")]
-        fn vst2_s8_(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
-    }
-vst2_s8_(b.0, b.1, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    let b: int32x2_t = vdup_n_s32(b);
+    vqdmulh_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i16")]
-        fn vst2_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32);
-    }
-vst2_s16_(a as _, b.0, b.1, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    let b: int32x4_t = vdupq_n_s32(b);
+    vqdmulhq_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v4i16.p0i8")]
-        fn vst2_s16_(a: int16x4_t, b: int16x4_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v4i16"
+        )]
+        fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vst2_s16_(b.0, b.1, a as _)
+    _vqdmulh_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2i32")]
-        fn vst2_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v8i16"
+        )]
+        fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vst2_s32_(a as _, b.0, b.1, 4)
+    _vqdmulhq_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v2i32.p0i8")]
-        fn vst2_s32_(a: int32x2_t, b: int32x2_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v2i32"
+        )]
+        fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vst2_s32_(b.0, b.1, a as _)
+    _vqdmulh_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v16i8")]
-        fn vst2q_s8_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v4i32"
+        )]
+        fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vst2q_s8_(a as _, b.0, b.1, 1)
+    _vqdmulhq_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v16i8.p0i8")]
-        fn vst2q_s8_(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
-    }
-vst2q_s8_(b.0, b.1, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i16")]
-        fn vst2q_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32);
-    }
-vst2q_s16_(a as _, b.0, b.1, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)
+#[doc = "Vector saturating doubling long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v8i16.p0i8")]
-        fn vst2q_s16_(a: int16x8_t, b: int16x8_t, ptr: *mut i8);
-    }
-vst2q_s16_(b.0, b.1, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vqdmull_s16(a, vdup_n_s16(b))
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)
+#[doc = "Vector saturating doubling long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i32")]
-        fn vst2q_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32);
-    }
-vst2q_s32_(a as _, b.0, b.1, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vqdmull_s32(a, vdup_n_s32(b))
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v4i32.p0i8")]
-        fn vst2q_s32_(a: int32x4_t, b: int32x4_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmull.v4i32"
+        )]
+        fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
     }
-vst2q_s32_(b.0, b.1, a as _)
+    _vqdmull_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v1i64")]
-        fn vst2_s64_(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmull.v2i64"
+        )]
+        fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
     }
-vst2_s64_(a as _, b.0, b.1, 8)
+    _vqdmull_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v1i64.p0i8")]
-        fn vst2_s64_(a: int64x1_t, b: int64x1_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v8i8"
+        )]
+        fn _vqmovn_s16(a: int16x8_t) -> int8x8_t;
     }
-vst2_s64_(b.0, b.1, a as _)
+    _vqmovn_s16(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) {
-    vst2_s8(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v4i16"
+        )]
+        fn _vqmovn_s32(a: int32x4_t) -> int16x4_t;
+    }
+    _vqmovn_s32(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) {
-    vst2_s16(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v2i32"
+        )]
+        fn _vqmovn_s64(a: int64x2_t) -> int32x2_t;
+    }
+    _vqmovn_s64(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) {
-    vst2_s32(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v8i8"
+        )]
+        fn _vqmovn_u16(a: int16x8_t) -> int8x8_t;
+    }
+    _vqmovn_u16(a.as_signed()).as_unsigned()
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) {
-    vst2q_s8(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v4i16"
+        )]
+        fn _vqmovn_u32(a: int32x4_t) -> int16x4_t;
+    }
+    _vqmovn_u32(a.as_signed()).as_unsigned()
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) {
-    vst2q_s16(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v2i32"
+        )]
+        fn _vqmovn_u64(a: int64x2_t) -> int32x2_t;
+    }
+    _vqmovn_u64(a.as_signed()).as_unsigned()
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) {
-    vst2q_s32(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtun)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v8i8"
+        )]
+        fn _vqmovun_s16(a: int16x8_t) -> int8x8_t;
+    }
+    _vqmovun_s16(a).as_unsigned()
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) {
-    vst2_s8(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtun)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v4i16"
+        )]
+        fn _vqmovun_s32(a: int32x4_t) -> int16x4_t;
+    }
+    _vqmovun_s32(a).as_unsigned()
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) {
-    vst2_s16(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtun)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v2i32"
+        )]
+        fn _vqmovun_s64(a: int64x2_t) -> int32x2_t;
+    }
+    _vqmovun_s64(a).as_unsigned()
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) {
-    vst2q_s8(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
+        fn _vqneg_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vqneg_s8(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) {
-    vst2q_s16(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
+        fn _vqnegq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vqnegq_s8(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) {
-    vst2_s64(transmute(a), transmute(b))
-}
-
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
-    vst2_s64(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
+        fn _vqneg_s16(a: int16x4_t) -> int16x4_t;
+    }
+    _vqneg_s16(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2f32")]
-        fn vst2_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
+        fn _vqnegq_s16(a: int16x8_t) -> int16x8_t;
     }
-vst2_f32_(a as _, b.0, b.1, 4)
+    _vqnegq_s16(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v2f32.p0i8")]
-        fn vst2_f32_(a: float32x2_t, b: float32x2_t, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
+        fn _vqneg_s32(a: int32x2_t) -> int32x2_t;
     }
-vst2_f32_(b.0, b.1, a as _)
+    _vqneg_s32(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4f32")]
-        fn vst2q_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
+        fn _vqnegq_s32(a: int32x4_t) -> int32x4_t;
     }
-vst2q_f32_(a as _, b.0, b.1, 4)
+    _vqnegq_s32(a)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2.v4f32.p0i8")]
-        fn vst2q_f32_(a: float32x4_t, b: float32x4_t, ptr: *mut i8);
-    }
-vst2q_f32_(b.0, b.1, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b: int16x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulh_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i8")]
-        fn vst2_lane_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32);
-    }
-vst2_lane_s8_(a as _, b.0, b.1, LANE, 1)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
+    vqrdmulh_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v8i8.p0i8")]
-        fn vst2_lane_s8_(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8);
-    }
-vst2_lane_s8_(b.0, b.1, LANE as i64, a as _)
+    let b: int16x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulh_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i16")]
-        fn vst2_lane_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32);
-    }
-vst2_lane_s16_(a as _, b.0, b.1, LANE, 2)
+    let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
+    vqrdmulh_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v4i16.p0i8")]
-        fn vst2_lane_s16_(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8);
-    }
-vst2_lane_s16_(b.0, b.1, LANE as i64, a as _)
+    let b: int16x8_t = simd_shuffle!(
+        b,
+        b,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmulhq_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2i32")]
-        fn vst2_lane_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32);
-    }
-vst2_lane_s32_(a as _, b.0, b.1, LANE, 4)
-}
-
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)
-#[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v2i32.p0i8")]
-        fn vst2_lane_s32_(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8);
-    }
-vst2_lane_s32_(b.0, b.1, LANE as i64, a as _)
-}
-
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i16")]
-        fn vst2q_lane_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32);
-    }
-vst2q_lane_s16_(a as _, b.0, b.1, LANE, 2)
+    let b: int32x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulhq_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v8i16.p0i8")]
-        fn vst2q_lane_s16_(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8);
-    }
-vst2q_lane_s16_(b.0, b.1, LANE as i64, a as _)
-}
-
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i32")]
-        fn vst2q_lane_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32);
-    }
-vst2q_lane_s32_(a as _, b.0, b.1, LANE, 4)
+    let b: int16x8_t = simd_shuffle!(
+        b,
+        b,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmulhq_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v4i32.p0i8")]
-        fn vst2q_lane_s32_(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8);
-    }
-vst2q_lane_s32_(b.0, b.1, LANE as i64, a as _)
+    let b: int32x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulhq_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst2_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    vqrdmulh_s16(a, vdup_n_s16(b))
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst2_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    vqrdmulhq_s16(a, vdupq_n_s16(b))
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst2_lane_s32::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    vqrdmulh_s32(a, vdup_n_s32(b))
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst2q_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    vqrdmulhq_s32(a, vdupq_n_s32(b))
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst2q_lane_s32::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v4i16"
+        )]
+        fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqrdmulh_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst2_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v8i16"
+        )]
+        fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqrdmulhq_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst2_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v2i32"
+        )]
+        fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqrdmulh_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst2q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst2q_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v4i32"
+        )]
+        fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqrdmulhq_s32(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2f32")]
-        fn vst2_lane_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v8i8"
+        )]
+        fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vst2_lane_f32_(a as _, b.0, b.1, LANE, 4)
+    _vqrshl_s8(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v2f32.p0i8")]
-        fn vst2_lane_f32_(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v16i8"
+        )]
+        fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vst2_lane_f32_(b.0, b.1, LANE as i64, a as _)
+    _vqrshlq_s8(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4f32")]
-        fn vst2q_lane_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v4i16"
+        )]
+        fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vst2q_lane_f32_(a as _, b.0, b.1, LANE, 4)
+    _vqrshl_s16(a, b)
 }
 
-/// Store multiple 2-element structures from two registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st2lane.v4f32.p0i8")]
-        fn vst2q_lane_f32_(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v8i16"
+        )]
+        fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vst2q_lane_f32_(b.0, b.1, LANE as i64, a as _)
+    _vqrshlq_s16(a, b)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8i8")]
-        fn vst3_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v2i32"
+        )]
+        fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vst3_s8_(a as _, b.0, b.1, b.2, 1)
+    _vqrshl_s32(a, b)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v8i8.p0i8")]
-        fn vst3_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v4i32"
+        )]
+        fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vst3_s8_(b.0, b.1, b.2, a as _)
+    _vqrshlq_s32(a, b)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4i16")]
-        fn vst3_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v1i64"
+        )]
+        fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-vst3_s16_(a as _, b.0, b.1, b.2, 2)
+    _vqrshl_s64(a, b)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v4i16.p0i8")]
-        fn vst3_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v2i64"
+        )]
+        fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-vst3_s16_(b.0, b.1, b.2, a as _)
+    _vqrshlq_s64(a, b)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v2i32")]
-        fn vst3_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v8i8"
+        )]
+        fn _vqrshl_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vst3_s32_(a as _, b.0, b.1, b.2, 4)
+    _vqrshl_u8(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v2i32.p0i8")]
-        fn vst3_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v16i8"
+        )]
+        fn _vqrshlq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vst3_s32_(b.0, b.1, b.2, a as _)
+    _vqrshlq_u8(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v16i8")]
-        fn vst3q_s8_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v4i16"
+        )]
+        fn _vqrshl_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vst3q_s8_(a as _, b.0, b.1, b.2, 1)
+    _vqrshl_u16(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v16i8.p0i8")]
-        fn vst3q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v8i16"
+        )]
+        fn _vqrshlq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vst3q_s8_(b.0, b.1, b.2, a as _)
+    _vqrshlq_u16(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8i16")]
-        fn vst3q_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v2i32"
+        )]
+        fn _vqrshl_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vst3q_s16_(a as _, b.0, b.1, b.2, 2)
+    _vqrshl_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v8i16.p0i8")]
-        fn vst3q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v4i32"
+        )]
+        fn _vqrshlq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vst3q_s16_(b.0, b.1, b.2, a as _)
+    _vqrshlq_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4i32")]
-        fn vst3q_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v1i64"
+        )]
+        fn _vqrshl_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-vst3q_s32_(a as _, b.0, b.1, b.2, 4)
+    _vqrshl_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v4i32.p0i8")]
-        fn vst3q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v2i64"
+        )]
+        fn _vqrshlq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-vst3q_s32_(b.0, b.1, b.2, a as _)
+    _vqrshlq_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    #[allow(improper_ctypes)]
+pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v1i64")]
-        fn vst3_s64_(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
+        fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
-vst3_s64_(a as _, b.0, b.1, b.2, 8)
+    _vqrshrn_n_s16(
+        a,
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        },
+    )
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v1i64.p0i8")]
-        fn vst3_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
+        fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
-vst3_s64_(b.0, b.1, b.2, a as _)
-}
-
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) {
-    vst3_s8(transmute(a), transmute(b))
+    _vqrshrn_n_s32(
+        a,
+        const { int16x8_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
+    )
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) {
-    vst3_s16(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
+        fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    _vqrshrn_n_s64(a, const { int16x8_t([-N as i64, -N as i64]) })
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) {
-    vst3_s32(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrn.v8i8"
+        )]
+        fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    _vqrshrn_n_s16(a, N)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) {
-    vst3q_s8(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrn.v4i16"
+        )]
+        fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vqrshrn_n_s32(a, N)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) {
-    vst3q_s16(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrn.v2i32"
+        )]
+        fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vqrshrn_n_s64(a, N)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) {
-    vst3q_s32(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
+        fn _vqrshrn_n_u16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    _vqrshrnq_n_u16(
+        a.as_signed(),
+        const {
+            uint16x8_t([
+                -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
+                -N as u16,
+            ])
+        },
+    )
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) {
-    vst3_s8(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
+        fn _vqrshrn_n_u32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    _vqrshrnq_n_u32(
+        a.as_signed(),
+        const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) },
+    )
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) {
-    vst3_s16(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
+        fn _vqrshrn_n_u64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    _vqrshrnq_n_u64(a.as_signed(), const { uint64x2_t([-N as u64, -N as u64]) })
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) {
-    vst3q_s8(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshrn.v8i8"
+        )]
+        fn _vqrshrn_n_u16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    _vqrshrn_n_u16(a.as_signed(), N).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) {
-    vst3q_s16(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshrn.v4i16"
+        )]
+        fn _vqrshrn_n_u32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vqrshrn_n_u32(a.as_signed(), N).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) {
-    vst3_s64(transmute(a), transmute(b))
-}
-
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) {
-    vst3_s64(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshrn.v2i32"
+        )]
+        fn _vqrshrn_n_u64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vqrshrn_n_u64(a.as_signed(), N).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3))]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
-    #[allow(improper_ctypes)]
+pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        }
+    );
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v2f32")]
-        fn vst3_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
+        fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
-vst3_f32_(a as _, b.0, b.1, b.2, 4)
+    _vqrshrun_n_s16(
+        a,
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) });
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v2f32.p0i8")]
-        fn vst3_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
+        fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
-vst3_f32_(b.0, b.1, b.2, a as _)
+    _vqrshrun_n_s32(
+        a,
+        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3))]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
-    #[allow(improper_ctypes)]
+pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(const { int64x2_t([-N as i64, -N as i64]) });
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4f32")]
-        fn vst3q_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
+        fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
-vst3q_f32_(a as _, b.0, b.1, b.2, 4)
+    _vqrshrun_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
-    #[allow(improper_ctypes)]
+pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3.v4f32.p0i8")]
-        fn vst3q_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v8i8"
+        )]
+        fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
     }
-vst3q_f32_(b.0, b.1, b.2, a as _)
+    _vqrshrun_n_s16(a, N).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8i8")]
-        fn vst3_lane_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v4i16"
+        )]
+        fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
     }
-vst3_lane_s8_(a as _, b.0, b.1, b.2, LANE, 1)
+    _vqrshrun_n_s32(a, N).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v8i8.p0i8")]
-        fn vst3_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v2i32"
+        )]
+        fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
     }
-vst3_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _)
+    _vqrshrun_n_s64(a, N).as_unsigned()
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4i16")]
-        fn vst3_lane_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i32, size: i32);
-    }
-vst3_lane_s16_(a as _, b.0, b.1, b.2, LANE, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshl_s8(a, vdup_n_s8(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v4i16.p0i8")]
-        fn vst3_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8);
-    }
-vst3_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshlq_s8(a, vdupq_n_s8(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v2i32")]
-        fn vst3_lane_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i32, size: i32);
-    }
-vst3_lane_s32_(a as _, b.0, b.1, b.2, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshl_s16(a, vdup_n_s16(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v2i32.p0i8")]
-        fn vst3_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8);
-    }
-vst3_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshlq_s16(a, vdupq_n_s16(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8i16")]
-        fn vst3q_lane_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i32, size: i32);
-    }
-vst3q_lane_s16_(a as _, b.0, b.1, b.2, LANE, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshl_s32(a, vdup_n_s32(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v8i16.p0i8")]
-        fn vst3q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8);
-    }
-vst3q_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshlq_s32(a, vdupq_n_s32(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4i32")]
-        fn vst3q_lane_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i32, size: i32);
-    }
-vst3q_lane_s32_(a as _, b.0, b.1, b.2, LANE, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshl_s64(a, vdup_n_s64(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v4i32.p0i8")]
-        fn vst3q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8);
-    }
-vst3q_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshlq_s64(a, vdupq_n_s64(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst3_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshl_u8(a, vdup_n_s8(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst3_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshlq_u8(a, vdupq_n_s8(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst3_lane_s32::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshl_u16(a, vdup_n_s16(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst3q_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshlq_u16(a, vdupq_n_s16(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst3q_lane_s32::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshl_u32(a, vdup_n_s32(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst3_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshlq_u32(a, vdupq_n_s32(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst3_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshl_u64(a, vdup_n_s64(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst3q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x3_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst3q_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshlq_u64(a, vdupq_n_s64(N as _))
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v2f32")]
-        fn vst3_lane_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v8i8"
+        )]
+        fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vst3_lane_f32_(a as _, b.0, b.1, b.2, LANE, 4)
+    _vqshl_s8(a, b)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v2f32.p0i8")]
-        fn vst3_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v16i8"
+        )]
+        fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vst3_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _)
+    _vqshlq_s8(a, b)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4f32")]
-        fn vst3q_lane_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v4i16"
+        )]
+        fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vst3q_lane_f32_(a as _, b.0, b.1, b.2, LANE, 4)
+    _vqshl_s16(a, b)
 }
 
-/// Store multiple 3-element structures from three registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st3lane.v4f32.p0i8")]
-        fn vst3q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v8i16"
+        )]
+        fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vst3q_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _)
+    _vqshlq_s16(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8i8")]
-        fn vst4_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v2i32"
+        )]
+        fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vst4_s8_(a as _, b.0, b.1, b.2, b.3, 1)
+    _vqshl_s32(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v8i8.p0i8")]
-        fn vst4_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v4i32"
+        )]
+        fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vst4_s8_(b.0, b.1, b.2, b.3, a as _)
+    _vqshlq_s32(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4i16")]
-        fn vst4_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v1i64"
+        )]
+        fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-vst4_s16_(a as _, b.0, b.1, b.2, b.3, 2)
+    _vqshl_s64(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v4i16.p0i8")]
-        fn vst4_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v2i64"
+        )]
+        fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-vst4_s16_(b.0, b.1, b.2, b.3, a as _)
+    _vqshlq_s64(a, b)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v2i32")]
-        fn vst4_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v8i8"
+        )]
+        fn _vqshl_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vst4_s32_(a as _, b.0, b.1, b.2, b.3, 4)
+    _vqshl_u8(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v2i32.p0i8")]
-        fn vst4_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v16i8"
+        )]
+        fn _vqshlq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vst4_s32_(b.0, b.1, b.2, b.3, a as _)
+    _vqshlq_u8(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v16i8")]
-        fn vst4q_s8_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v4i16"
+        )]
+        fn _vqshl_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vst4q_s8_(a as _, b.0, b.1, b.2, b.3, 1)
+    _vqshl_u16(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v16i8.p0i8")]
-        fn vst4q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v8i16"
+        )]
+        fn _vqshlq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vst4q_s8_(b.0, b.1, b.2, b.3, a as _)
+    _vqshlq_u16(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8i16")]
-        fn vst4q_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v2i32"
+        )]
+        fn _vqshl_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vst4q_s16_(a as _, b.0, b.1, b.2, b.3, 2)
+    _vqshl_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v8i16.p0i8")]
-        fn vst4q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v4i32"
+        )]
+        fn _vqshlq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vst4q_s16_(b.0, b.1, b.2, b.3, a as _)
+    _vqshlq_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4i32")]
-        fn vst4q_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v1i64"
+        )]
+        fn _vqshl_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-vst4q_s32_(a as _, b.0, b.1, b.2, b.3, 4)
+    _vqshl_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v4i32.p0i8")]
-        fn vst4q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v2i64"
+        )]
+        fn _vqshlq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-vst4q_s32_(b.0, b.1, b.2, b.3, a as _)
+    _vqshlq_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v1i64")]
-        fn vst4_s64_(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
+        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> int8x8_t;
     }
-vst4_s64_(a as _, b.0, b.1, b.2, b.3, 8)
+    _vqshlu_n_s8(
+        a,
+        const {
+            int8x8_t([
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+            ])
+        },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v1i64.p0i8")]
-        fn vst4_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
+        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> int8x16_t;
     }
-vst4_s64_(b.0, b.1, b.2, b.3, a as _)
+    _vqshluq_n_s8(
+        a,
+        const {
+            int8x16_t([
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+            ])
+        },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) {
-    vst4_s8(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
+        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> int16x4_t;
+    }
+    _vqshlu_n_s16(
+        a,
+        const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) {
-    vst4_s16(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
+        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> int16x8_t;
+    }
+    _vqshluq_n_s16(
+        a,
+        const {
+            int16x8_t([
+                N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
+            ])
+        },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) {
-    vst4_s32(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
+        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> int32x2_t;
+    }
+    _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) {
-    vst4q_s8(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
+        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> int32x4_t;
+    }
+    _vqshluq_n_s32(
+        a,
+        const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) {
-    vst4q_s16(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
+        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> int64x1_t;
+    }
+    _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) {
-    vst4q_s32(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
+        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> int64x2_t;
+    }
+    _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) {
-    vst4_s8(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v8i8"
+        )]
+        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> int8x8_t;
+    }
+    _vqshlu_n_s8(
+        a,
+        const {
+            int8x8_t([
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+            ])
+        },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) {
-    vst4_s16(transmute(a), transmute(b))
-}
-
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) {
-    vst4q_s8(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v16i8"
+        )]
+        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> int8x16_t;
+    }
+    _vqshluq_n_s8(
+        a,
+        const {
+            int8x16_t([
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+            ])
+        },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) {
-    vst4q_s16(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v4i16"
+        )]
+        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> int16x4_t;
+    }
+    _vqshlu_n_s16(
+        a,
+        const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) {
-    vst4_s64(transmute(a), transmute(b))
-}
-
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) {
-    vst4_s64(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v8i16"
+        )]
+        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> int16x8_t;
+    }
+    _vqshluq_n_s16(
+        a,
+        const {
+            int16x8_t([
+                N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
+            ])
+        },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v2f32")]
-        fn vst4_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, size: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v2i32"
+        )]
+        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> int32x2_t;
     }
-vst4_f32_(a as _, b.0, b.1, b.2, b.3, 4)
+    _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v2f32.p0i8")]
-        fn vst4_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v4i32"
+        )]
+        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> int32x4_t;
     }
-vst4_f32_(b.0, b.1, b.2, b.3, a as _)
+    _vqshluq_n_s32(
+        a,
+        const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4f32")]
-        fn vst4q_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, size: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v1i64"
+        )]
+        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> int64x1_t;
     }
-vst4q_f32_(a as _, b.0, b.1, b.2, b.3, 4)
+    _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4.v4f32.p0i8")]
-        fn vst4q_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v2i64"
+        )]
+        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> int64x2_t;
     }
-vst4q_f32_(b.0, b.1, b.2, b.3, a as _)
+    _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8i8")]
-        fn vst4_lane_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
+        fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
-vst4_lane_s8_(a as _, b.0, b.1, b.2, b.3, LANE, 1)
+    _vqshrn_n_s16(
+        a,
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        },
+    )
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v8i8.p0i8")]
-        fn vst4_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
+        fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
-vst4_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqshrn_n_s32(
+        a,
+        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
+    )
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4i16")]
-        fn vst4_lane_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
+        fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
-vst4_lane_s16_(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+    _vqshrn_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) })
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v4i16.p0i8")]
-        fn vst4_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.v8i8"
+        )]
+        fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
     }
-vst4_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqshrn_n_s16(a, N)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v2i32")]
-        fn vst4_lane_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i32, size: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.v4i16"
+        )]
+        fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
     }
-vst4_lane_s32_(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    _vqshrn_n_s32(a, N)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v2i32.p0i8")]
-        fn vst4_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.v2i32"
+        )]
+        fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
     }
-vst4_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqshrn_n_s64(a, N)
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8i16")]
-        fn vst4q_lane_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
+        fn _vqshrn_n_u16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
-vst4q_lane_s16_(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+    _vqshrn_n_u16(
+        a.as_signed(),
+        const {
+            uint16x8_t([
+                -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
+                -N as u16,
+            ])
+        },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v8i16.p0i8")]
-        fn vst4q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
+        fn _vqshrn_n_u32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
-vst4q_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqshrn_n_u32(
+        a.as_signed(),
+        const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) },
+    )
+    .as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4i32")]
-        fn vst4q_lane_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
+        fn _vqshrn_n_u64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
-vst4q_lane_s32_(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    _vqshrn_n_u64(a.as_signed(), const { uint64x2_t([-N as u64, -N as u64]) }).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v4i32.p0i8")]
-        fn vst4q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.v8i8"
+        )]
+        fn _vqshrn_n_u16(a: int16x8_t, n: i32) -> int8x8_t;
     }
-vst4q_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqshrn_n_u16(a.as_signed(), N).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst4_lane_s8::<LANE>(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.v4i16"
+        )]
+        fn _vqshrn_n_u32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vqshrn_n_u32(a.as_signed(), N).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst4_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.v2i32"
+        )]
+        fn _vqshrn_n_u64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vqshrn_n_u64(a.as_signed(), N).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst4_lane_s32::<LANE>(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
+        fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    _vqshrun_n_u8(
+        a,
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        },
+    )
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst4q_lane_s16::<LANE>(transmute(a), transmute(b))
-}
-
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst4q_lane_s32::<LANE>(transmute(a), transmute(b))
-}
-
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst4_lane_s8::<LANE>(transmute(a), transmute(b))
-}
-
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    vst4_lane_s16::<LANE>(transmute(a), transmute(b))
-}
-
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vst4q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x4_t) {
-    static_assert_uimm_bits!(LANE, 3);
-    vst4q_lane_s16::<LANE>(transmute(a), transmute(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
+        fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    _vqshrun_n_u16(
+        a,
+        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
+    )
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v2f32")]
-        fn vst4_lane_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i32, size: i32);
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
+        fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
-vst4_lane_f32_(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    _vqshrun_n_u32(a, const { int64x2_t([-N as i64, -N as i64]) })
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v2f32.p0i8")]
-        fn vst4_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrun.v8i8"
+        )]
+        fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
     }
-vst4_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqshrun_n_s16(a, N).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4f32")]
-        fn vst4q_lane_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i32, size: i32);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrun.v4i16"
+        )]
+        fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
     }
-vst4q_lane_f32_(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    _vqshrun_n_s32(a, N).as_unsigned()
 }
 
-/// Store multiple 4-element structures from four registers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
-    static_assert_uimm_bits!(LANE, 2);
-    #[allow(improper_ctypes)]
+pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.st4lane.v4f32.p0i8")]
-        fn vst4q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i64, ptr: *mut i8);
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrun.v2i32"
+        )]
+        fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
     }
-vst4q_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqshrun_n_s64(a, N).as_unsigned()
 }
 
-/// Dot product vector form with unsigned and signed integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usdot))]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8")]
-        fn vusdot_s32_(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.8i8")]
+        fn _vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vusdot_s32_(a, b, c)
+    _vqsub_s8(a, b)
 }
 
-/// Dot product vector form with unsigned and signed integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usdot))]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8")]
-        fn vusdotq_s32_(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.16i8")]
+        fn _vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vusdotq_s32_(a, b, c)
-}
-
-/// Dot product index form with unsigned and signed integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = transmute(c);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vusdot_s32(a, b, transmute(c))
-}
-
-/// Dot product index form with unsigned and signed integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = transmute(c);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vusdotq_s32(a, b, transmute(c))
-}
-
-/// Dot product index form with signed and unsigned integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sudot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = transmute(c);
-    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vusdot_s32(a, transmute(c), b)
+    _vqsubq_s8(a, b)
 }
 
-/// Dot product index form with signed and unsigned integers
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sudot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = transmute(c);
-    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vusdotq_s32(a, transmute(c), b)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.4i16")]
+        fn _vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqsub_s16(a, b)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.8i16")]
+        fn _vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqsubq_s16(a, b)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.2i32")]
+        fn _vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqsub_s32(a, b)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.4i32")]
+        fn _vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqsubq_s32(a, b)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v1i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.1i64")]
+        fn _vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqsub_s64(a, b)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v2i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.2i64")]
+        fn _vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqsubq_s64(a, b)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.8i8")]
+        fn _vqsub_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqsub_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.16i8")]
+        fn _vqsubq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqsubq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.4i16")]
+        fn _vqsub_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqsub_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.8i16")]
+        fn _vqsubq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqsubq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.2i32")]
+        fn _vqsub_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqsub_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.4i32")]
+        fn _vqsubq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqsubq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v1i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.1i64")]
+        fn _vqsub_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqsub_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Polynomial multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(pmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.pmul.v8i8")]
-        fn vmul_p8_(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v2i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.2i64")]
+        fn _vqsubq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-vmul_p8_(a, b)
+    _vqsubq_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Polynomial multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(pmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frecpe)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.pmul.v16i8")]
-        fn vmulq_p8_(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v2f32"
+        )]
+        fn _vrecpe_f32(a: float32x2_t) -> float32x2_t;
     }
-vmulq_p8_(a, b)
+    _vrecpe_f32(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frecpe)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v4f32"
+        )]
+        fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrecpeq_f32(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)
+#[doc = "Unsigned reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_mul(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urecpe)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urecpe.v2i32"
+        )]
+        fn _vrecpe_u32(a: int32x2_t) -> int32x2_t;
+    }
+    _vrecpe_u32(a.as_signed()).as_unsigned()
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)
+#[doc = "Unsigned reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    simd_mul(a, vdup_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urecpe)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urecpe.v4i32"
+        )]
+        fn _vrecpeq_u32(a: int32x4_t) -> int32x4_t;
+    }
+    _vrecpeq_u32(a.as_signed()).as_unsigned()
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    simd_mul(a, vdupq_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frecps)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v2f32"
+        )]
+        fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vrecps_f32(a, b)
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    simd_mul(a, vdup_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frecps)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v4f32"
+        )]
+        fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vrecpsq_f32(a, b)
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    simd_mul(a, vdupq_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
+    transmute(a)
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
-    simd_mul(a, vdup_n_u16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
-    simd_mul(a, vdupq_n_u16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
-    simd_mul(a, vdup_n_u32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
-    simd_mul(a, vdupq_n_u32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
-    simd_mul(a, vdup_n_f32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Vector multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
-    simd_mul(a, vdupq_n_f32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(mul, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Floating-point multiply
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Signed multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smull.v8i8")]
-        fn vmull_s8_(a: int8x8_t, b: int8x8_t) -> int16x8_t;
-    }
-vmull_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Signed multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smull.v4i16")]
-        fn vmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
-    }
-vmull_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Signed multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smull.v2i32")]
-        fn vmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
-    }
-vmull_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Unsigned multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umull.v8i8")]
-        fn vmull_u8_(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t;
-    }
-vmull_u8_(a, b)
-}
-
-/// Unsigned multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+    transmute(a)
+}
+
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umull.v4i16")]
-        fn vmull_u16_(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t;
-    }
-vmull_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Unsigned multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umull.v2i32")]
-        fn vmull_u32_(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t;
-    }
-vmull_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Polynomial multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(pmull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.pmull.v8i8")]
-        fn vmull_p8_(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
-    }
-vmull_p8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Vector long multiply with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vmull_s16(a, vdup_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Vector long multiply with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vmull_s32(a, vdup_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Vector long multiply with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
-    vmull_u16(a, vdup_n_u16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Vector long multiply with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
-    vmull_u32(a, vdup_n_u32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Vector long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_s16(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Vector long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_s16(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Vector long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Vector long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Vector long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_u16(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Vector long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_u16(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Vector long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Vector long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umull, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.v2f32")]
-        fn vfma_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
-    }
-vfma_f32_(b, c, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.fma.v4f32")]
-        fn vfmaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
-    }
-vfmaq_f32_(b, c, a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vfma_f32(a, b, vdup_n_f32_vfp4(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Floating-point fused Multiply-Add to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmla))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vfmaq_f32(a, b, vdupq_n_f32_vfp4(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Floating-point fused multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    let b: float32x2_t = simd_neg(b);
-    vfma_f32(a, b, c)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Floating-point fused multiply-subtract from accumulator
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    let b: float32x4_t = simd_neg(b);
-    vfmaq_f32(a, b, c)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Floating-point fused Multiply-subtract to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vfms_f32(a, b, vdup_n_f32_vfp4(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Floating-point fused Multiply-subtract to accumulator(vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmls))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vfmsq_f32(a, b, vdupq_n_f32_vfp4(c))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_sub(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Bitwise exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vadd_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    simd_xor(a, b)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Bitwise exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vadd_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    simd_xor(a, b)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Bitwise exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vaddq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    simd_xor(a, b)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Bitwise exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vaddq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    simd_xor(a, b)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Bitwise exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
-    simd_xor(a, b)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Bitwise exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    simd_xor(a, b)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Bitwise exclusive OR
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddq_p128)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vaddq_p128(a: p128, b: p128) -> p128 {
-    a ^ b
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
-    let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
-    let c: i32x4 = i32x4::new(16, 16, 16, 16);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
-    let c: i64x2 = i64x2::new(32, 32);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    let c: u32x4 = u32x4::new(16, 16, 16, 16);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    let c: u64x2 = u64x2::new(32, 32);
-    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let d: int8x8_t = vsubhn_s16(b, c);
-    simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let d: int16x4_t = vsubhn_s32(b, c);
-    simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let d: int32x2_t = vsubhn_s64(b, c);
-    simd_shuffle!(a, d, [0, 1, 2, 3])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let d: uint8x8_t = vsubhn_u16(b, c);
-    simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let d: uint16x4_t = vsubhn_u32(b, c);
-    simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(subhn2))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let d: uint32x2_t = vsubhn_u64(b, c);
-    simd_shuffle!(a, d, [0, 1, 2, 3])
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v8i8")]
-        fn vhsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-vhsub_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v16i8")]
-        fn vhsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-vhsubq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v4i16")]
-        fn vhsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-vhsub_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v8i16")]
-        fn vhsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-vhsubq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v2i32")]
-        fn vhsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-vhsub_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uhsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uhsub.v4i32")]
-        fn vhsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-vhsubq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v8i8")]
-        fn vhsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vhsub_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v16i8")]
-        fn vhsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vhsubq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v4i16")]
-        fn vhsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vhsub_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v8i16")]
-        fn vhsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vhsubq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v2i32")]
-        fn vhsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vhsub_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Signed halving subtract
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shsub))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.shsub.v4i32")]
-        fn vhsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vhsubq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Signed Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubw))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Signed Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubw))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Signed Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubw))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Unsigned Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubw))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Unsigned Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubw))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Unsigned Subtract Wide
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubw))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
-    simd_sub(a, simd_cast(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Signed Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
-    let c: int16x8_t = simd_cast(a);
-    let d: int16x8_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Signed Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    let c: int32x4_t = simd_cast(a);
-    let d: int32x4_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Signed Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssubl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    let c: int64x2_t = simd_cast(a);
-    let d: int64x2_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Unsigned Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
-    let c: uint16x8_t = simd_cast(a);
-    let d: uint16x8_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Unsigned Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    let c: uint32x4_t = simd_cast(a);
-    let d: uint32x4_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Unsigned Subtract Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usubl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    let c: uint64x2_t = simd_cast(a);
-    let d: uint64x2_t = simd_cast(b);
-    simd_sub(c, d)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Dot product arithmetic (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sdot))]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8")]
-        fn vdot_s32_(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
-    }
-vdot_s32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Dot product arithmetic (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sdot))]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8")]
-        fn vdotq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
-    }
-vdotq_s32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Dot product arithmetic (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(udot))]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.udot.v2i32.v8i8")]
-        fn vdot_u32_(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t;
-    }
-vdot_u32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Dot product arithmetic (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(udot))]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.udot.v4i32.v16i8")]
-        fn vdotq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
-    }
-vdotq_u32_(a, b, c)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Dot product arithmetic (indexed)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = transmute(c);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vdot_s32(a, b, transmute(c))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Dot product arithmetic (indexed)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = transmute(c);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vdotq_s32(a, b, transmute(c))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Dot product arithmetic (indexed)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(udot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = transmute(c);
-    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vdot_u32(a, b, transmute(c))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Dot product arithmetic (indexed)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(udot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_dotprod", issue = "117224"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = transmute(c);
-    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vdotq_u32(a, b, transmute(c))
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v8i8")]
-        fn vmax_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vmax_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v16i8")]
-        fn vmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vmaxq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v4i16")]
-        fn vmax_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vmax_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v8i16")]
-        fn vmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vmaxq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v2i32")]
-        fn vmax_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vmax_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smax.v4i32")]
-        fn vmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vmaxq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v8i8")]
-        fn vmax_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-vmax_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v16i8")]
-        fn vmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-vmaxq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v4i16")]
-        fn vmax_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-vmax_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v8i16")]
-        fn vmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-vmaxq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v2i32")]
-        fn vmax_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-vmax_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umax.v4i32")]
-        fn vmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-vmaxq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmax.v2f32")]
-        fn vmax_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vmax_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Maximum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmax))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmax.v4f32")]
-        fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vmaxq_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Floating-point Maximum Number (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmaxnm))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnm.v2f32")]
-        fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vmaxnm_f32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Floating-point Maximum Number (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmaxnm))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmaxnm.v4f32")]
-        fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vmaxnmq_f32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v8i8")]
-        fn vmin_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vmin_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v16i8")]
-        fn vminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vminq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v4i16")]
-        fn vmin_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vmin_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v8i16")]
-        fn vminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vminq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v2i32")]
-        fn vmin_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vmin_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(smin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.smin.v4i32")]
-        fn vminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vminq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v8i8")]
-        fn vmin_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-vmin_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v16i8")]
-        fn vminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    }
-vminq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v4i16")]
-        fn vmin_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-vmin_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v8i16")]
-        fn vminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    }
-vminq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v2i32")]
-        fn vmin_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-vmin_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(umin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.umin.v4i32")]
-        fn vminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    }
-vminq_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmin.v2f32")]
-        fn vmin_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vmin_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Minimum (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fmin))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fmin.v4f32")]
-        fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vminq_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Floating-point Minimum Number (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fminnm))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnm.v2f32")]
-        fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vminnm_f32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Floating-point Minimum Number (vector)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(fminnm))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.fminnm.v4f32")]
-        fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vminnmq_f32_(a, b)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Floating-point add pairwise
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(faddp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.faddp.v2f32")]
-        fn vpadd_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vpadd_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmull.v4i32")]
-        fn vqdmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
-    }
-vqdmull_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmull.v2i64")]
-        fn vqdmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
-    }
-vqdmull_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling long multiply with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vqdmull_s16(a, vdup_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling long multiply with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vqdmull_s32(a, vdup_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling long multiply by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmull, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply-add long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_n_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_n_s32(b, c))
-}
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+    transmute(a)
+}
 
-/// Vector widening saturating doubling multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal, N = 2))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Vector widening saturating doubling multiply accumulate with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlal, N = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply-subtract long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_n_s16(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_n_s32(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl, N = 2))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Vector widening saturating doubling multiply subtract with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmlsl, N = 1))]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulh.v4i16")]
-        fn vqdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vqdmulh_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulh.v8i16")]
-        fn vqdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vqdmulhq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulh.v2i32")]
-        fn vqdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vqdmulh_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqdmulh.v4i32")]
-        fn vqdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vqdmulhq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling multiply high with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    let b: int16x4_t = vdup_n_s16(b);
-    vqdmulh_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling multiply high with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    let b: int32x2_t = vdup_n_s32(b);
-    vqdmulh_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling multiply high with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    let b: int16x8_t = vdupq_n_s16(b);
-    vqdmulhq_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling multiply high with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    let b: int32x4_t = vdupq_n_s32(b);
-    vqdmulhq_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Vector saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Signed saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtn.v8i8")]
-        fn vqmovn_s16_(a: int16x8_t) -> int8x8_t;
-    }
-vqmovn_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtn.v4i16")]
-        fn vqmovn_s32_(a: int32x4_t) -> int16x4_t;
-    }
-vqmovn_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Signed saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtn.v2i32")]
-        fn vqmovn_s64_(a: int64x2_t) -> int32x2_t;
-    }
-vqmovn_s64_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Unsigned saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqxtn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqxtn.v8i8")]
-        fn vqmovn_u16_(a: uint16x8_t) -> uint8x8_t;
-    }
-vqmovn_u16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Unsigned saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqxtn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqxtn.v4i16")]
-        fn vqmovn_u32_(a: uint32x4_t) -> uint16x4_t;
-    }
-vqmovn_u32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Unsigned saturating extract narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqxtn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqxtn.v2i32")]
-        fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t;
-    }
-vqmovn_u64_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Signed saturating extract unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtun))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtun.v8i8")]
-        fn vqmovun_s16_(a: int16x8_t) -> uint8x8_t;
-    }
-vqmovun_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating extract unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtun))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtun.v4i16")]
-        fn vqmovun_s32_(a: int32x4_t) -> uint16x4_t;
-    }
-vqmovun_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating extract unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqxtun))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqxtun.v2i32")]
-        fn vqmovun_s64_(a: int64x2_t) -> uint32x2_t;
-    }
-vqmovun_s64_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmulh.v4i16")]
-        fn vqrdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vqrdmulh_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmulh.v8i16")]
-        fn vqrdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vqrdmulhq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmulh.v2i32")]
-        fn vqrdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vqrdmulh_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding doubling multiply returning high half
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrdmulh.v4i32")]
-        fn vqrdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vqrdmulhq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Vector saturating rounding doubling multiply high with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    vqrdmulh_s16(a, vdup_n_s16(b))
-}
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+    transmute(a)
+}
 
-/// Vector saturating rounding doubling multiply high with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    vqrdmulhq_s16(a, vdupq_n_s16(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Vector saturating rounding doubling multiply high with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    vqrdmulh_s32(a, vdup_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Vector saturating rounding doubling multiply high with scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    vqrdmulhq_s32(a, vdupq_n_s32(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+    transmute(a)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: int16x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulh_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let b: int16x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulh_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: int16x8_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let b: int16x8_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s16(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
+    transmute(a)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
-    vqrdmulh_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
-    vqrdmulh_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let b: int32x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Vector rounding saturating doubling multiply high by scalar
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: int32x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s32(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v8i8")]
-        fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vqrshl_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v16i8")]
-        fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vqrshlq_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v4i16")]
-        fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vqrshl_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v8i16")]
-        fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vqrshlq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v2i32")]
-        fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vqrshl_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v4i32")]
-        fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vqrshlq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v1i64")]
-        fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-vqrshl_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshl.v2i64")]
-        fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-vqrshlq_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v8i8")]
-        fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
-    }
-vqrshl_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v16i8")]
-        fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
-    }
-vqrshlq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v4i16")]
-        fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
-    }
-vqrshl_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v8i16")]
-        fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    }
-vqrshlq_u16_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v2i32")]
-        fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-vqrshl_u32_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v4i32")]
-        fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    }
-vqrshlq_u32_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v1i64")]
-        fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    }
-vqrshl_u64_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqrshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshl.v2i64")]
-        fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
-    }
-vqrshlq_u64_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
-        fn vqrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-vqrshrn_n_s16_(a, const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) })
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrn.v8i8")]
-        fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-vqrshrn_n_s16_(a, N)
-}
-
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
-        fn vqrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
-    }
-vqrshrn_n_s32_(a, const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) })
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrn.v4i16")]
-        fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-vqrshrn_n_s32_(a, N)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
-        fn vqrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-vqrshrn_n_s64_(a, const { int64x2_t([-N as i64, -N as i64]) })
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrn.v2i32")]
-        fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-vqrshrn_n_s64_(a, N)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
-        fn vqrshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
-    }
-vqrshrn_n_u16_(a, const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) })
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshrn.v8i8")]
-        fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
-    }
-vqrshrn_n_u16_(a, N)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
-        fn vqrshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
-    }
-vqrshrn_n_u32_(a, const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) })
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshrn.v4i16")]
-        fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
-    }
-vqrshrn_n_u32_(a, N)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
-        fn vqrshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
-    }
-vqrshrn_n_u64_(a, const { uint64x2_t([-N as u64, -N as u64]) })
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Unsigned signed saturating rounded shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqrshrn.v2i32")]
-        fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
-    }
-vqrshrn_n_u64_(a, N)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
-        fn vqrshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
-    }
-vqrshrun_n_s16_(a, const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) })
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrun.v8i8")]
-        fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
-    }
-vqrshrun_n_s16_(a, N)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
-        fn vqrshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
-    }
-vqrshrun_n_s32_(a, const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) })
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrun.v4i16")]
-        fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
-    }
-vqrshrun_n_s32_(a, N)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
-        fn vqrshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
-    }
-vqrshrun_n_s64_(a, const { int64x2_t([-N as i64, -N as i64]) })
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
+    transmute(a)
 }
 
-/// Signed saturating rounded shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqrshrun.v2i32")]
-        fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
-    }
-vqrshrun_n_s64_(a, N)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v8i8")]
-        fn vqshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vqshl_s8_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v16i8")]
-        fn vqshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-vqshlq_s8_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v4i16")]
-        fn vqshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-vqshl_s16_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v8i16")]
-        fn vqshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vqshlq_s16_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v2i32")]
-        fn vqshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vqshl_s32_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v4i32")]
-        fn vqshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vqshlq_s32_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v1i64")]
-        fn vqshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-vqshl_s64_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshl.v2i64")]
-        fn vqshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-vqshlq_s64_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v8i8")]
-        fn vqshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
-    }
-vqshl_u8_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v16i8")]
-        fn vqshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
-    }
-vqshlq_u8_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v4i16")]
-        fn vqshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
-    }
-vqshl_u16_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v8i16")]
-        fn vqshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    }
-vqshlq_u16_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v2i32")]
-        fn vqshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-vqshl_u32_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v4i32")]
-        fn vqshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    }
-vqshlq_u32_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v1i64")]
-        fn vqshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    }
-vqshl_u64_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshl.v2i64")]
-        fn vqshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
-    }
-vqshlq_u64_(a, b)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshl_s8(a, vdup_n_s8(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshlq_s8(a, vdupq_n_s8(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshl_s16(a, vdup_n_s16(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshlq_s16(a, vdupq_n_s16(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshl_s32(a, vdup_n_s32(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshlq_s32(a, vdupq_n_s32(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshl_s64(a, vdup_n_s64(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshlq_s64(a, vdupq_n_s64(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshl_u8(a, vdup_n_s8(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshlq_u8(a, vdupq_n_s8(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshl_u16(a, vdup_n_s16(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshlq_u16(a, vdupq_n_s16(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshl_u32(a, vdup_n_s32(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshlq_u32(a, vdupq_n_s32(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshl_u64(a, vdup_n_s64(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
+    transmute(a)
 }
 
-/// Unsigned saturating shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshlq_u64(a, vdupq_n_s64(N as _))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
+    transmute(a)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
-        fn vqshlu_n_s8_(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")]
+        fn _vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vqshlu_n_s8_(a, const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) })
+    _vrhadd_s8(a, b)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v8i8")]
-        fn vqshlu_n_s8_(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")]
+        fn _vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vqshlu_n_s8_(a, const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) })
+    _vrhaddq_s8(a, b)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
-        fn vqshlu_n_s16_(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")]
+        fn _vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vqshlu_n_s16_(a, const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) })
+    _vrhadd_s16(a, b)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v4i16")]
-        fn vqshlu_n_s16_(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")]
+        fn _vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vqshlu_n_s16_(a, const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) })
+    _vrhaddq_s16(a, b)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
-        fn vqshlu_n_s32_(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")]
+        fn _vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vqshlu_n_s32_(a, const { int32x2_t([N as i32, N as i32]) })
+    _vrhadd_s32(a, b)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v2i32")]
-        fn vqshlu_n_s32_(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srhadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")]
+        fn _vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vqshlu_n_s32_(a, const { int32x2_t([N as i32, N as i32]) })
+    _vrhaddq_s32(a, b)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
-        fn vqshlu_n_s64_(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")]
+        fn _vrhadd_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vqshlu_n_s64_(a, const { int64x1_t([N as i64]) })
+    _vrhadd_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v1i64")]
-        fn vqshlu_n_s64_(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")]
+        fn _vrhaddq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vqshlu_n_s64_(a, const { int64x1_t([N as i64]) })
+    _vrhaddq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
-        fn vqshluq_n_s8_(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")]
+        fn _vrhadd_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vqshluq_n_s8_(a, const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) })
+    _vrhadd_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v16i8")]
-        fn vqshluq_n_s8_(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")]
+        fn _vrhaddq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vqshluq_n_s8_(a, const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) })
+    _vrhaddq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
-        fn vqshluq_n_s16_(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")]
+        fn _vrhadd_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vqshluq_n_s16_(a, const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) })
+    _vrhadd_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhaddq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urhadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v8i16")]
-        fn vqshluq_n_s16_(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urhadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")]
+        fn _vrhaddq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vqshluq_n_s16_(a, const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) })
+    _vrhaddq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
-        fn vqshluq_n_s32_(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
-    }
-vqshluq_n_s32_(a, const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) })
-}
-
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)
-#[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frintn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v4i32")]
-        fn vqshluq_n_s32_(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frintn.v2f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
+        fn _vrndn_f32(a: float32x2_t) -> float32x2_t;
     }
-vqshluq_n_s32_(a, const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) })
+    _vrndn_f32(a)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frintn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
-        fn vqshluq_n_s64_(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frintn.v4f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
+        fn _vrndnq_f32(a: float32x4_t) -> float32x4_t;
     }
-vqshluq_n_s64_(a, const { int64x2_t([N as i64, N as i64]) })
+    _vrndnq_f32(a)
 }
 
-/// Signed saturating shift left unsigned
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshlu.v2i64")]
-        fn vqshluq_n_s64_(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v8i8"
+        )]
+        fn _vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vqshluq_n_s64_(a, const { int64x2_t([N as i64, N as i64]) })
+    _vrshl_s8(a, b)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
-        fn vqshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v16i8"
+        )]
+        fn _vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vqshrn_n_s16_(a, const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) })
+    _vrshlq_s8(a, b)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrn.v8i8")]
-        fn vqshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v4i16"
+        )]
+        fn _vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vqshrn_n_s16_(a, N)
+    _vrshl_s16(a, b)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
-        fn vqshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v8i16"
+        )]
+        fn _vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vqshrn_n_s32_(a, const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) })
+    _vrshlq_s16(a, b)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrn.v4i16")]
-        fn vqshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v2i32"
+        )]
+        fn _vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vqshrn_n_s32_(a, N)
+    _vrshl_s32(a, b)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
-        fn vqshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v4i32"
+        )]
+        fn _vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vqshrn_n_s64_(a, const { int64x2_t([-N as i64, -N as i64]) })
+    _vrshlq_s32(a, b)
 }
 
-/// Signed saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrn.v2i32")]
-        fn vqshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v1i64"
+        )]
+        fn _vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-vqshrn_n_s64_(a, N)
+    _vrshl_s64(a, b)
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
-        fn vqshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.srshl.v2i64"
+        )]
+        fn _vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-vqshrn_n_u16_(a, const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) })
+    _vrshlq_s64(a, b)
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshrn.v8i8")]
-        fn vqshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v8i8"
+        )]
+        fn _vrshl_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-vqshrn_n_u16_(a, N)
+    _vrshl_u8(a.as_signed(), b).as_unsigned()
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
-        fn vqshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v16i8"
+        )]
+        fn _vrshlq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-vqshrn_n_u32_(a, const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) })
+    _vrshlq_u8(a.as_signed(), b).as_unsigned()
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshrn.v4i16")]
-        fn vqshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v4i16"
+        )]
+        fn _vrshl_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-vqshrn_n_u32_(a, N)
+    _vrshl_u16(a.as_signed(), b).as_unsigned()
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
-        fn vqshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v8i16"
+        )]
+        fn _vrshlq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-vqshrn_n_u64_(a, const { uint64x2_t([-N as u64, -N as u64]) })
+    _vrshlq_u16(a.as_signed(), b).as_unsigned()
 }
 
-/// Unsigned saturating shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.uqshrn.v2i32")]
-        fn vqshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v2i32"
+        )]
+        fn _vrshl_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-vqshrn_n_u64_(a, N)
+    _vrshl_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
-        fn vqshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v4i32"
+        )]
+        fn _vrshlq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-vqshrun_n_s16_(a, const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) })
+    _vrshlq_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrun.v8i8")]
-        fn vqshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v1i64"
+        )]
+        fn _vrshl_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-vqshrun_n_s16_(a, N)
+    _vrshl_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
-        fn vqshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urshl.v2i64"
+        )]
+        fn _vrshlq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-vqshrun_n_s32_(a, const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) })
+    _vrshlq_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshr, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrun.v4i16")]
-        fn vqshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
-    }
-vqshrun_n_s32_(a, N)
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    vrshl_s8(a, vdup_n_s8(-N as _))
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshr, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
-        fn vqshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
-    }
-vqshrun_n_s64_(a, const { int64x2_t([-N as i64, -N as i64]) })
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    vrshlq_s8(a, vdupq_n_s8(-N as _))
 }
 
-/// Signed saturating shift right unsigned narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshr, N = 2)
+)]
 #[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqshrun.v2i32")]
-        fn vqshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
-    }
-vqshrun_n_s64_(a, N)
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    vrshl_s16(a, vdup_n_s16(-N as _))
 }
 
-/// Reciprocal square-root estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frsqrte))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
-        fn vrsqrte_f32_(a: float32x2_t) -> float32x2_t;
-    }
-vrsqrte_f32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    vrshlq_s16(a, vdupq_n_s16(-N as _))
 }
 
-/// Reciprocal square-root estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f32)
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frsqrte))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrte.v4f32")]
-        fn vrsqrteq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-vrsqrteq_f32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    vrshl_s32(a, vdup_n_s32(-N as _))
 }
 
-/// Unsigned reciprocal square root estimate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_u32)
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursqrte))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ursqrte.v2i32")]
-        fn vrsqrte_u32_(a: uint32x2_t) -> uint32x2_t;
-    }
-vrsqrte_u32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    vrshlq_s32(a, vdupq_n_s32(-N as _))
 }
 
-/// Unsigned reciprocal square root estimate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_u32)
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursqrte))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ursqrte.v4i32")]
-        fn vrsqrteq_u32_(a: uint32x4_t) -> uint32x4_t;
-    }
-vrsqrteq_u32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    vrshl_s64(a, vdup_n_s64(-N as _))
 }
 
-/// Floating-point reciprocal square root step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frsqrts))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.v2f32")]
-        fn vrsqrts_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vrsqrts_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    vrshlq_s64(a, vdupq_n_s64(-N as _))
 }
 
-/// Floating-point reciprocal square root step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f32)
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frsqrts))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frsqrts.v4f32")]
-        fn vrsqrtsq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vrsqrtsq_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    vrshl_u8(a, vdup_n_s8(-N as _))
 }
 
-/// Reciprocal estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frecpe))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.v2f32")]
-        fn vrecpe_f32_(a: float32x2_t) -> float32x2_t;
-    }
-vrecpe_f32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    vrshlq_u8(a, vdupq_n_s8(-N as _))
 }
 
-/// Reciprocal estimate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frecpe))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecpe.v4f32")]
-        fn vrecpeq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-vrecpeq_f32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    vrshl_u16(a, vdup_n_s16(-N as _))
 }
 
-/// Unsigned reciprocal estimate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urecpe))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urecpe.v2i32")]
-        fn vrecpe_u32_(a: uint32x2_t) -> uint32x2_t;
-    }
-vrecpe_u32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    vrshlq_u16(a, vdupq_n_s16(-N as _))
 }
 
-/// Unsigned reciprocal estimate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urecpe))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urecpe.v4i32")]
-        fn vrecpeq_u32_(a: uint32x4_t) -> uint32x4_t;
-    }
-vrecpeq_u32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    vrshl_u32(a, vdup_n_s32(-N as _))
 }
 
-/// Floating-point reciprocal step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frecps))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.v2f32")]
-        fn vrecps_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-vrecps_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    vrshlq_u32(a, vdupq_n_s32(-N as _))
 }
 
-/// Floating-point reciprocal step
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(frecps))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.frecps.v4f32")]
-        fn vrecpsq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-vrecpsq_f32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    vrshl_u64(a, vdup_n_s64(-N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    vrshlq_u64(a, vdupq_n_s64(-N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")]
+        fn _vrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    _vrshrn_n_s16(
+        a,
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        },
+    )
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")]
+        fn _vrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    _vrshrn_n_s32(
+        a,
+        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
+    )
+}
+
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")]
+        fn _vrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    _vrshrn_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) })
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rshrn.v8i8"
+        )]
+        fn _vrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    _vrshrn_n_s16(a, N)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rshrn.v4i16"
+        )]
+        fn _vrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vrshrn_n_s32(a, N)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(rshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rshrn.v2i32"
+        )]
+        fn _vrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vrshrn_n_s64(a, N)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rshrn, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    transmute(vrshrn_n_s16::<N>(transmute(a)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rshrn, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    transmute(vrshrn_n_s32::<N>(transmute(a)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rshrn, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    transmute(vrshrn_n_s64::<N>(transmute(a)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frsqrte)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v2f32"
+        )]
+        fn _vrsqrte_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrsqrte_f32(a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frsqrte)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v4f32"
+        )]
+        fn _vrsqrteq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrsqrteq_f32(a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)
+#[doc = "Unsigned reciprocal square root estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursqrte)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ursqrte.v2i32"
+        )]
+        fn _vrsqrte_u32(a: int32x2_t) -> int32x2_t;
+    }
+    _vrsqrte_u32(a.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)
+#[doc = "Unsigned reciprocal square root estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursqrte)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ursqrte.v4i32"
+        )]
+        fn _vrsqrteq_u32(a: int32x4_t) -> int32x4_t;
+    }
+    _vrsqrteq_u32(a.as_signed()).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frsqrts)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v2f32"
+        )]
+        fn _vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vrsqrts_f32(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frsqrts)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v4f32"
+        )]
+        fn _vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vrsqrtsq_f32(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srsra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_add(a, vrshr_n_s8::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srsra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_add(a, vrshrq_n_s8::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srsra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_add(a, vrshr_n_s16::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srsra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_add(a, vrshrq_n_s16::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srsra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_add(a, vrshr_n_s32::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srsra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_add(a, vrshrq_n_s32::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srsra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    simd_add(a, vrshr_n_s64::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)
+#[doc = "Signed rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(srsra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    simd_add(a, vrshrq_n_s64::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_add(a, vrshr_n_u8::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_add(a, vrshrq_n_u8::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_add(a, vrshr_n_u16::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_add(a, vrshrq_n_u16::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_add(a, vrshr_n_u32::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_add(a, vrshrq_n_u32::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    simd_add(a, vrshr_n_u64::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)
+#[doc = "Unsigned rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ursra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    simd_add(a, vrshrq_n_u64::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rsubhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rsubhn.v8i8"
+        )]
+        fn _vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t;
+    }
+    _vrsubhn_s16(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rsubhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rsubhn.v4i16"
+        )]
+        fn _vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t;
+    }
+    _vrsubhn_s32(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rsubhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rsubhn.v2i32"
+        )]
+        fn _vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t;
+    }
+    _vrsubhn_s64(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rsubhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    transmute(vrsubhn_s16(transmute(a), transmute(b)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rsubhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    transmute(vrsubhn_s32(transmute(a), transmute(b)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rsubhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    transmute(vrsubhn_s64(transmute(a), transmute(b)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(LANE, 4);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
-    transmute(a)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(LANE == 0);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t {
+    static_assert!(LANE == 0);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(LANE == 0);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
-    transmute(a)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(b, LANE as u32, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shl(a, vdup_n_s8(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shl(a, vdupq_n_s8(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shl(a, vdup_n_s16(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shl(a, vdupq_n_s16(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    simd_shl(a, vdup_n_s32(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    simd_shl(a, vdupq_n_s32(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    simd_shl(a, vdup_n_s64(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    simd_shl(a, vdupq_n_s64(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shl(a, vdup_n_u8(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shl(a, vdupq_n_u8(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shl(a, vdup_n_u16(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shl(a, vdupq_n_u16(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    simd_shl(a, vdup_n_u32(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    simd_shl(a, vdupq_n_u32(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    simd_shl(a, vdup_n_u64(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)
+#[doc = "Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    simd_shl(a, vdupq_n_u64(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sshl.v8i8"
+        )]
+        fn _vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vshl_s8(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sshl.v16i8"
+        )]
+        fn _vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vshlq_s8(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sshl.v4i16"
+        )]
+        fn _vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vshl_s16(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sshl.v8i16"
+        )]
+        fn _vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vshlq_s16(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sshl.v2i32"
+        )]
+        fn _vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vshl_s32(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sshl.v4i32"
+        )]
+        fn _vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vshlq_s32(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sshl.v1i64"
+        )]
+        fn _vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vshl_s64(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sshl.v2i64"
+        )]
+        fn _vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vshlq_s64(a, b)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v8i8"
+        )]
+        fn _vshl_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vshl_u8(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v16i8"
+        )]
+        fn _vshlq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vshlq_u8(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v4i16"
+        )]
+        fn _vshl_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vshl_u16(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v8i16"
+        )]
+        fn _vshlq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vshlq_u16(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v2i32"
+        )]
+        fn _vshl_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vshl_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v4i32"
+        )]
+        fn _vshlq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vshlq_u32(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v1i64"
+        )]
+        fn _vshl_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vshl_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ushl.v2i64"
+        )]
+        fn _vshlq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vshlq_u64(a.as_signed(), b).as_unsigned()
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshll, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshll_n_s16<const N: i32>(a: int16x4_t) -> int32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    simd_shl(simd_cast(a), vdupq_n_s32(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshll, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshll_n_s32<const N: i32>(a: int32x2_t) -> int64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    simd_shl(simd_cast(a), vdupq_n_s64(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshll, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshll_n_s8<const N: i32>(a: int8x8_t) -> int16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    simd_shl(simd_cast(a), vdupq_n_s16(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushll, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshll_n_u16<const N: i32>(a: uint16x4_t) -> uint32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    simd_shl(simd_cast(a), vdupq_n_u32(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushll, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshll_n_u32<const N: i32>(a: uint32x2_t) -> uint64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    simd_shl(simd_cast(a), vdupq_n_u64(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushll, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshll_n_u8<const N: i32>(a: uint8x8_t) -> uint16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    simd_shl(simd_cast(a), vdupq_n_u16(N as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    let n: i32 = if N == 8 { 7 } else { N };
+    simd_shr(a, vdup_n_s8(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    let n: i32 = if N == 8 { 7 } else { N };
+    simd_shr(a, vdupq_n_s8(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    let n: i32 = if N == 16 { 15 } else { N };
+    simd_shr(a, vdup_n_s16(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    let n: i32 = if N == 16 { 15 } else { N };
+    simd_shr(a, vdupq_n_s16(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    let n: i32 = if N == 32 { 31 } else { N };
+    simd_shr(a, vdup_n_s32(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    let n: i32 = if N == 32 { 31 } else { N };
+    simd_shr(a, vdupq_n_s32(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 { 63 } else { N };
+    simd_shr(a, vdup_n_s64(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sshr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 { 63 } else { N };
+    simd_shr(a, vdupq_n_s64(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    let n: i32 = if N == 8 {
+        return vdup_n_u8(0);
+    } else {
+        N
+    };
+    simd_shr(a, vdup_n_u8(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    let n: i32 = if N == 8 {
+        return vdupq_n_u8(0);
+    } else {
+        N
+    };
+    simd_shr(a, vdupq_n_u8(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    let n: i32 = if N == 16 {
+        return vdup_n_u16(0);
+    } else {
+        N
+    };
+    simd_shr(a, vdup_n_u16(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    let n: i32 = if N == 16 {
+        return vdupq_n_u16(0);
+    } else {
+        N
+    };
+    simd_shr(a, vdupq_n_u16(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    let n: i32 = if N == 32 {
+        return vdup_n_u32(0);
+    } else {
+        N
+    };
+    simd_shr(a, vdup_n_u32(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    let n: i32 = if N == 32 {
+        return vdupq_n_u32(0);
+    } else {
+        N
+    };
+    simd_shr(a, vdupq_n_u32(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 {
+        return vdup_n_u64(0);
+    } else {
+        N
+    };
+    simd_shr(a, vdup_n_u64(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)
+#[doc = "Shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ushr, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 {
+        return vdupq_n_u64(0);
+    } else {
+        N
+    };
+    simd_shr(a, vdupq_n_u64(n as _))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shrn, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_cast(simd_shr(a, vdupq_n_s16(N as _)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shrn, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_cast(simd_shr(a, vdupq_n_s32(N as _)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shrn, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_cast(simd_shr(a, vdupq_n_s64(N as _)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shrn, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_cast(simd_shr(a, vdupq_n_u16(N as _)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shrn, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_cast(simd_shr(a, vdupq_n_u32(N as _)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(shrn, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_cast(simd_shr(a, vdupq_n_u64(N as _)))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_add(a, vshr_n_s8::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_add(a, vshrq_n_s8::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_add(a, vshr_n_s16::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_add(a, vshrq_n_s16::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_add(a, vshr_n_s32::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_add(a, vshrq_n_s32::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    simd_add(a, vshr_n_s64::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)
+#[doc = "Signed shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    simd_add(a, vshrq_n_s64::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_add(a, vshr_n_u8::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_add(a, vshrq_n_u8::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_add(a, vshr_n_u16::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_add(a, vshrq_n_u16::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_add(a, vshr_n_u32::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_add(a, vshrq_n_u32::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    simd_add(a, vshr_n_u64::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)
+#[doc = "Unsigned shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usra, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    simd_add(a, vshrq_n_u64::<N>(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst1))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v2f32")]
+        fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t);
+    }
+    _vst1_f32_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst1))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v4f32")]
+        fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t);
+    }
+    _vst1q_f32_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v2f32.p0f32"
+        )]
+        fn _vst1_f32_x2(a: float32x2_t, b: float32x2_t, ptr: *mut f32);
+    }
+    _vst1_f32_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v4f32.p0f32"
+        )]
+        fn _vst1q_f32_x2(a: float32x4_t, b: float32x4_t, ptr: *mut f32);
+    }
+    _vst1q_f32_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst1))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v2f32")]
+        fn _vst1_f32_x3(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t);
+    }
+    _vst1_f32_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst1))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v4f32")]
+        fn _vst1q_f32_x3(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t);
+    }
+    _vst1q_f32_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v2f32.p0f32"
+        )]
+        fn _vst1_f32_x3(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32);
+    }
+    _vst1_f32_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v4f32.p0f32"
+        )]
+        fn _vst1q_f32_x3(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32);
+    }
+    _vst1q_f32_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v2f32")]
+        fn _vst1_f32_x4(
+            ptr: *mut f32,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+        );
+    }
+    _vst1_f32_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v4f32")]
+        fn _vst1q_f32_x4(
+            ptr: *mut f32,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+        );
+    }
+    _vst1q_f32_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v2f32.p0f32"
+        )]
+        fn _vst1_f32_x4(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            ptr: *mut f32,
+        );
+    }
+    _vst1_f32_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(st1))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v4f32.p0f32"
+        )]
+        fn _vst1q_f32_x4(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            ptr: *mut f32,
+        );
+    }
+    _vst1q_f32_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
-    transmute(a)
-}
-
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1_t) {
+    static_assert!(LANE == 0);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1_t) {
+    static_assert!(LANE == 0);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1_t) {
+    static_assert!(LANE == 0);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) {
+    vst1_s64_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) {
+    vst1_s64_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
-    transmute(a)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) {
+    vst1_s64_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) {
+    vst1q_s64_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
+    vst1q_s64_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
-    transmute(a)
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) {
+    vst1q_s64_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v8i8.p0i8"
+        )]
+        fn _vst1_s8_x2(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
+    }
+    _vst1_s8_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v16i8.p0i8"
+        )]
+        fn _vst1q_s8_x2(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
+    }
+    _vst1q_s8_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v4i16.p0i16"
+        )]
+        fn _vst1_s16_x2(a: int16x4_t, b: int16x4_t, ptr: *mut i16);
+    }
+    _vst1_s16_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v8i16.p0i16"
+        )]
+        fn _vst1q_s16_x2(a: int16x8_t, b: int16x8_t, ptr: *mut i16);
+    }
+    _vst1q_s16_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v2i32.p0i32"
+        )]
+        fn _vst1_s32_x2(a: int32x2_t, b: int32x2_t, ptr: *mut i32);
+    }
+    _vst1_s32_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v4i32.p0i32"
+        )]
+        fn _vst1q_s32_x2(a: int32x4_t, b: int32x4_t, ptr: *mut i32);
+    }
+    _vst1q_s32_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v1i64.p0i64"
+        )]
+        fn _vst1_s64_x2(a: int64x1_t, b: int64x1_t, ptr: *mut i64);
+    }
+    _vst1_s64_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v2i64.p0i64"
+        )]
+        fn _vst1q_s64_x2(a: int64x2_t, b: int64x2_t, ptr: *mut i64);
+    }
+    _vst1q_s64_x2(b.0, b.1, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v8i8")]
+        fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t);
+    }
+    _vst1_s8_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v16i8")]
+        fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t);
+    }
+    _vst1q_s8_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v4i16")]
+        fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t);
+    }
+    _vst1_s16_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v8i16")]
+        fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t);
+    }
+    _vst1q_s16_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v2i32")]
+        fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t);
+    }
+    _vst1_s32_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v4i32")]
+        fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t);
+    }
+    _vst1q_s32_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v1i64")]
+        fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t);
+    }
+    _vst1_s64_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v2i64")]
+        fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t);
+    }
+    _vst1q_s64_x2(a, b.0, b.1)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v8i8.p0i8"
+        )]
+        fn _vst1_s8_x3(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
+    }
+    _vst1_s8_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v16i8.p0i8"
+        )]
+        fn _vst1q_s8_x3(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
+    }
+    _vst1q_s8_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v4i16.p0i16"
+        )]
+        fn _vst1_s16_x3(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16);
+    }
+    _vst1_s16_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v8i16.p0i16"
+        )]
+        fn _vst1q_s16_x3(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16);
+    }
+    _vst1q_s16_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v2i32.p0i32"
+        )]
+        fn _vst1_s32_x3(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32);
+    }
+    _vst1_s32_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v4i32.p0i32"
+        )]
+        fn _vst1q_s32_x3(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32);
+    }
+    _vst1q_s32_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v1i64.p0i64"
+        )]
+        fn _vst1_s64_x3(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64);
+    }
+    _vst1_s64_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v2i64.p0i64"
+        )]
+        fn _vst1q_s64_x3(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64);
+    }
+    _vst1q_s64_x3(b.0, b.1, b.2, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v8i8")]
+        fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t);
+    }
+    _vst1_s8_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v16i8")]
+        fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t);
+    }
+    _vst1q_s8_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v4i16")]
+        fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t);
+    }
+    _vst1_s16_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v8i16")]
+        fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t);
+    }
+    _vst1q_s16_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v2i32")]
+        fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t);
+    }
+    _vst1_s32_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v4i32")]
+        fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t);
+    }
+    _vst1q_s32_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v1i64")]
+        fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t);
+    }
+    _vst1_s64_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v2i64")]
+        fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t);
+    }
+    _vst1q_s64_x3(a, b.0, b.1, b.2)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v8i8.p0i8"
+        )]
+        fn _vst1_s8_x4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
+    }
+    _vst1_s8_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v16i8.p0i8"
+        )]
+        fn _vst1q_s8_x4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
+    }
+    _vst1q_s8_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v4i16.p0i16"
+        )]
+        fn _vst1_s16_x4(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16);
+    }
+    _vst1_s16_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v8i16.p0i16"
+        )]
+        fn _vst1q_s16_x4(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16);
+    }
+    _vst1q_s16_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v2i32.p0i32"
+        )]
+        fn _vst1_s32_x4(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32);
+    }
+    _vst1_s32_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v4i32.p0i32"
+        )]
+        fn _vst1q_s32_x4(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32);
+    }
+    _vst1q_s32_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v1i64.p0i64"
+        )]
+        fn _vst1_s64_x4(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64);
+    }
+    _vst1_s64_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v2i64.p0i64"
+        )]
+        fn _vst1q_s64_x4(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64);
+    }
+    _vst1q_s64_x4(b.0, b.1, b.2, b.3, a)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v8i8")]
+        fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t);
+    }
+    _vst1_s8_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v16i8")]
+        fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t);
+    }
+    _vst1q_s8_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v4i16")]
+        fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t);
+    }
+    _vst1_s16_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v8i16")]
+        fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t);
+    }
+    _vst1q_s16_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v2i32")]
+        fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t);
+    }
+    _vst1_s32_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v4i32")]
+        fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t);
+    }
+    _vst1q_s32_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v1i64")]
+        fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t);
+    }
+    _vst1_s64_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
-    transmute(a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst1))]
+pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v2i64")]
+        fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t);
+    }
+    _vst1q_s64_x4(a, b.0, b.1, b.2, b.3)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) {
+    vst1_s8_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) {
+    vst1_s8_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) {
+    vst1_s8_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) {
+    vst1q_s8_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) {
+    vst1q_s8_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) {
+    vst1q_s8_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) {
+    vst1_s16_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) {
+    vst1_s16_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) {
+    vst1_s16_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) {
+    vst1q_s16_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) {
+    vst1q_s16_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) {
+    vst1q_s16_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) {
+    vst1_s32_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) {
+    vst1_s32_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) {
+    vst1_s32_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) {
+    vst1q_s32_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) {
+    vst1q_s32_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) {
+    vst1q_s32_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) {
+    vst1_s64_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) {
+    vst1_s64_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) {
+    vst1_s64_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) {
+    vst1q_s64_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) {
+    vst1q_s64_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) {
+    vst1q_s64_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) {
+    vst1_s8_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) {
+    vst1_s8_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) {
+    vst1_s8_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) {
+    vst1q_s8_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) {
+    vst1q_s8_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) {
+    vst1q_s8_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) {
+    vst1_s16_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) {
+    vst1_s16_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) {
+    vst1_s16_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) {
+    vst1q_s16_x2(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) {
+    vst1q_s16_x3(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
-    transmute(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
+    vst1q_s16_x4(transmute(a), transmute(b))
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
-    transmute(a)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst1q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v2f32.p0i8"
+        )]
+        fn _vst2_f32(a: float32x2_t, b: float32x2_t, ptr: *mut i8);
+    }
+    _vst2_f32(b.0, b.1, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v4f32.p0i8"
+        )]
+        fn _vst2q_f32(a: float32x4_t, b: float32x4_t, ptr: *mut i8);
+    }
+    _vst2q_f32(b.0, b.1, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v8i8.p0i8"
+        )]
+        fn _vst2_s8(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
+    }
+    _vst2_s8(b.0, b.1, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v16i8.p0i8"
+        )]
+        fn _vst2q_s8(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
+    }
+    _vst2q_s8(b.0, b.1, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v4i16.p0i8"
+        )]
+        fn _vst2_s16(a: int16x4_t, b: int16x4_t, ptr: *mut i8);
+    }
+    _vst2_s16(b.0, b.1, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v8i16.p0i8"
+        )]
+        fn _vst2q_s16(a: int16x8_t, b: int16x8_t, ptr: *mut i8);
+    }
+    _vst2q_s16(b.0, b.1, a as _)
 }
 
-/// Vector reinterpret cast operation
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
-    transmute(a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v2i32.p0i8"
+        )]
+        fn _vst2_s32(a: int32x2_t, b: int32x2_t, ptr: *mut i8);
+    }
+    _vst2_s32(b.0, b.1, a as _)
 }
 
-/// Signed rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v8i8")]
-        fn vrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v4i32.p0i8"
+        )]
+        fn _vst2q_s32(a: int32x4_t, b: int32x4_t, ptr: *mut i8);
     }
-vrshl_s8_(a, b)
+    _vst2q_s32(b.0, b.1, a as _)
 }
 
-/// Signed rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v16i8")]
-        fn vrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2f32")]
+        fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32);
     }
-vrshlq_s8_(a, b)
+    _vst2_f32(a as _, b.0, b.1, 4)
 }
 
-/// Signed rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v4i16")]
-        fn vrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4f32")]
+        fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32);
     }
-vrshl_s16_(a, b)
+    _vst2q_f32(a as _, b.0, b.1, 4)
 }
 
-/// Signed rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v8i16")]
-        fn vrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i8")]
+        fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32);
     }
-vrshlq_s16_(a, b)
+    _vst2_s8(a as _, b.0, b.1, 1)
 }
 
-/// Signed rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v2i32")]
-        fn vrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v16i8")]
+        fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32);
     }
-vrshl_s32_(a, b)
+    _vst2q_s8(a as _, b.0, b.1, 1)
 }
 
-/// Signed rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v4i32")]
-        fn vrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i16")]
+        fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32);
     }
-vrshlq_s32_(a, b)
+    _vst2_s16(a as _, b.0, b.1, 2)
 }
 
-/// Signed rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_s64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v1i64")]
-        fn vrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i16")]
+        fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32);
     }
-vrshl_s64_(a, b)
+    _vst2q_s16(a as _, b.0, b.1, 2)
 }
 
-/// Signed rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_s64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.srshl.v2i64")]
-        fn vrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2i32")]
+        fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32);
     }
-vrshlq_s64_(a, b)
+    _vst2_s32(a as _, b.0, b.1, 4)
 }
 
-/// Unsigned rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst2))]
+pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v8i8")]
-        fn vrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i32")]
+        fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32);
     }
-vrshl_u8_(a, b)
+    _vst2q_s32(a as _, b.0, b.1, 4)
 }
 
-/// Unsigned rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v16i8")]
-        fn vrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v2f32.p0i8"
+        )]
+        fn _vst2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8);
     }
-vrshlq_u8_(a, b)
+    _vst2_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Unsigned rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v4i16")]
-        fn vrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v4f32.p0i8"
+        )]
+        fn _vst2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8);
     }
-vrshl_u16_(a, b)
+    _vst2q_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Unsigned rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v8i16")]
-        fn vrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v8i8.p0i8"
+        )]
+        fn _vst2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8);
     }
-vrshlq_u16_(a, b)
+    _vst2_lane_s8(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Unsigned rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v2i32")]
-        fn vrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v4i16.p0i8"
+        )]
+        fn _vst2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8);
     }
-vrshl_u32_(a, b)
+    _vst2_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Unsigned rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v4i32")]
-        fn vrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v8i16.p0i8"
+        )]
+        fn _vst2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8);
     }
-vrshlq_u32_(a, b)
+    _vst2q_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Unsigned rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshl_u64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v1i64")]
-        fn vrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v2i32.p0i8"
+        )]
+        fn _vst2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8);
     }
-vrshl_u64_(a, b)
+    _vst2_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Unsigned rounding shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshlq_u64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.urshl.v2i64")]
-        fn vrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v4i32.p0i8"
+        )]
+        fn _vst2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8);
     }
-vrshlq_u64_(a, b)
+    _vst2q_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
 
-/// Signed rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    vrshl_s8(a, vdup_n_s8(-N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2f32")]
+        fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32);
+    }
+    _vst2_f32(a as _, b.0, b.1, LANE, 4)
 }
 
-/// Signed rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    vrshlq_s8(a, vdupq_n_s8(-N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4f32")]
+        fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32);
+    }
+    _vst2q_f32(a as _, b.0, b.1, LANE, 4)
 }
 
-/// Signed rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    vrshl_s16(a, vdup_n_s16(-N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i8")]
+        fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32);
+    }
+    _vst2_s8(a as _, b.0, b.1, LANE, 1)
 }
 
-/// Signed rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    vrshlq_s16(a, vdupq_n_s16(-N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i16")]
+        fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32);
+    }
+    _vst2_s16(a as _, b.0, b.1, LANE, 2)
 }
 
-/// Signed rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    vrshl_s32(a, vdup_n_s32(-N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i16")]
+        fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32);
+    }
+    _vst2q_s16(a as _, b.0, b.1, LANE, 2)
 }
 
-/// Signed rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    vrshlq_s32(a, vdupq_n_s32(-N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2i32")]
+        fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32);
+    }
+    _vst2_s32(a as _, b.0, b.1, LANE, 4)
+}
+
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i32")]
+        fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32);
+    }
+    _vst2q_s32(a as _, b.0, b.1, LANE, 4)
 }
 
-/// Signed rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_s64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    vrshl_s64(a, vdup_n_s64(-N as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst2_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_s64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    vrshlq_s64(a, vdupq_n_s64(-N as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst2_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Unsigned rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    vrshl_u8(a, vdup_n_s8(-N as _))
-}
-
-/// Unsigned rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u8)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    vrshlq_u8(a, vdupq_n_s8(-N as _))
-}
-
-/// Unsigned rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u16)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    vrshl_u16(a, vdup_n_s16(-N as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst2q_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Unsigned rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    vrshlq_u16(a, vdupq_n_s16(-N as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst2_lane_s32::<LANE>(transmute(a), transmute(b))
 }
 
-/// Unsigned rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    vrshl_u32(a, vdup_n_s32(-N as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst2q_lane_s32::<LANE>(transmute(a), transmute(b))
 }
 
-/// Unsigned rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    vrshlq_u32(a, vdupq_n_s32(-N as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst2_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Unsigned rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshr_n_u64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    vrshl_u64(a, vdup_n_s64(-N as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x2_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst2_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Unsigned rounding shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrq_n_u64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    vrshlq_u64(a, vdupq_n_s64(-N as _))
-}
-
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")]
-        fn vrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-vrshrn_n_s16_(a, const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) })
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x2_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst2q_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rshrn.v8i8")]
-        fn vrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-vrshrn_n_s16_(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
+    vst2_s64(transmute(a), transmute(b))
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")]
-        fn vrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v1i64")]
+        fn _vst2_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32);
     }
-vrshrn_n_s32_(a, const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) })
+    _vst2_s64(a as _, b.0, b.1, 8)
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rshrn.v4i16")]
-        fn vrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-vrshrn_n_s32_(a, N)
-}
-
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")]
-        fn vrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-vrshrn_n_s64_(a, const { int64x2_t([-N as i64, -N as i64]) })
-}
-
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_s64)
-#[inline]
 #[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    #[allow(improper_ctypes)]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
     extern "unadjusted" {
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rshrn.v2i32")]
-        fn vrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v1i64.p0i8"
+        )]
+        fn _vst2_s64(a: int64x1_t, b: int64x1_t, ptr: *mut i8);
     }
-vrshrn_n_s64_(a, N)
+    _vst2_s64(b.0, b.1, a as _)
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    transmute(vrshrn_n_s16::<N>(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) {
+    vst2_s64(transmute(a), transmute(b))
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    transmute(vrshrn_n_s32::<N>(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) {
+    vst2_s8(transmute(a), transmute(b))
 }
 
-/// Rounding shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_n_u64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    transmute(vrshrn_n_s64::<N>(transmute(a)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) {
+    vst2q_s8(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_add(a, vrshr_n_s8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) {
+    vst2_s16(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s8)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_add(a, vrshrq_n_s8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) {
+    vst2q_s16(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_add(a, vrshr_n_s16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) {
+    vst2_s32(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s16)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_add(a, vrshrq_n_s16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) {
+    vst2q_s32(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_add(a, vrshr_n_s32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) {
+    vst2_s8(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s32)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_add(a, vrshrq_n_s32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) {
+    vst2q_s8(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_s64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    simd_add(a, vrshr_n_s64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) {
+    vst2_s16(transmute(a), transmute(b))
 }
 
-/// Signed rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_s64)
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(srsra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    simd_add(a, vrshrq_n_s64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) {
+    vst2q_s16(transmute(a), transmute(b))
 }
 
-/// Unsigned rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_add(a, vrshr_n_u8::<N>(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v2f32")]
+        fn _vst3_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32);
+    }
+    _vst3_f32(a as _, b.0, b.1, b.2, 4)
 }
 
-/// Unsigned rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_add(a, vrshrq_n_u8::<N>(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4f32")]
+        fn _vst3q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32);
+    }
+    _vst3q_f32(a as _, b.0, b.1, b.2, 4)
 }
 
-/// Unsigned rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_add(a, vrshr_n_u16::<N>(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8i8")]
+        fn _vst3_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32);
+    }
+    _vst3_s8(a as _, b.0, b.1, b.2, 1)
 }
 
-/// Unsigned rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_add(a, vrshrq_n_u16::<N>(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v16i8")]
+        fn _vst3q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32);
+    }
+    _vst3q_s8(a as _, b.0, b.1, b.2, 1)
 }
 
-/// Unsigned rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_add(a, vrshr_n_u32::<N>(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4i16")]
+        fn _vst3_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32);
+    }
+    _vst3_s16(a as _, b.0, b.1, b.2, 2)
 }
 
-/// Unsigned rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_add(a, vrshrq_n_u32::<N>(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8i16")]
+        fn _vst3q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32);
+    }
+    _vst3q_s16(a as _, b.0, b.1, b.2, 2)
 }
 
-/// Unsigned rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsra_n_u64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    simd_add(a, vrshr_n_u64::<N>(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v2i32")]
+        fn _vst3_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32);
+    }
+    _vst3_s32(a as _, b.0, b.1, b.2, 4)
 }
 
-/// Unsigned rounding shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsraq_n_u64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ursra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    simd_add(a, vrshrq_n_u64::<N>(b))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst3))]
+pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4i32")]
+        fn _vst3q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32);
+    }
+    _vst3q_s32(a as _, b.0, b.1, b.2, 4)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rsubhn.v8i8")]
-        fn vrsubhn_s16_(a: int16x8_t, b: int16x8_t) -> int8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v2f32.p0i8"
+        )]
+        fn _vst3_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut i8);
     }
-vrsubhn_s16_(a, b)
+    _vst3_f32(b.0, b.1, b.2, a as _)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rsubhn.v4i16")]
-        fn vrsubhn_s32_(a: int32x4_t, b: int32x4_t) -> int16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v4f32.p0i8"
+        )]
+        fn _vst3q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut i8);
     }
-vrsubhn_s32_(a, b)
+    _vst3q_f32(b.0, b.1, b.2, a as _)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_s64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.rsubhn.v2i32")]
-        fn vrsubhn_s64_(a: int64x2_t, b: int64x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v8i8.p0i8"
+        )]
+        fn _vst3_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
     }
-vrsubhn_s64_(a, b)
+    _vst3_s8(b.0, b.1, b.2, a as _)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    transmute(vrsubhn_s16(transmute(a), transmute(b)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v16i8.p0i8"
+        )]
+        fn _vst3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
+    }
+    _vst3q_s8(b.0, b.1, b.2, a as _)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    transmute(vrsubhn_s32(transmute(a), transmute(b)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v4i16.p0i8"
+        )]
+        fn _vst3_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i8);
+    }
+    _vst3_s16(b.0, b.1, b.2, a as _)
 }
 
-/// Rounding subtract returning high narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(rsubhn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    transmute(vrsubhn_s64(transmute(a), transmute(b)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v8i16.p0i8"
+        )]
+        fn _vst3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i8);
+    }
+    _vst3q_s16(b.0, b.1, b.2, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(b, LANE as u32, a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v2i32.p0i8"
+        )]
+        fn _vst3_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i8);
+    }
+    _vst3_s32(b.0, b.1, b.2, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(b, LANE as u32, a)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v4i32.p0i8"
+        )]
+        fn _vst3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i8);
+    }
+    _vst3q_s32(b.0, b.1, b.2, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(b, LANE as u32, a)
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v2f32")]
+        fn _vst3_lane_f32(
+            ptr: *mut i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst3_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_s64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t {
-    static_assert!(LANE == 0);
-    simd_insert!(b, LANE as u32, a)
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4f32")]
+        fn _vst3q_lane_f32(
+            ptr: *mut i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst3q_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(b, LANE as u32, a)
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8i8")]
+        fn _vst3_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32);
+    }
+    _vst3_s8(a as _, b.0, b.1, b.2, LANE, 1)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(b, LANE as u32, a)
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4i16")]
+        fn _vst3_lane_s16(
+            ptr: *mut i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst3_s16(a as _, b.0, b.1, b.2, LANE, 2)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(b, LANE as u32, a)
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8i16")]
+        fn _vst3q_lane_s16(
+            ptr: *mut i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst3q_s16(a as _, b.0, b.1, b.2, LANE, 2)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_u64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(LANE == 0);
-    simd_insert!(b, LANE as u32, a)
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v2i32")]
+        fn _vst3_lane_s32(
+            ptr: *mut i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst3_s32(a as _, b.0, b.1, b.2, LANE, 4)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(b, LANE as u32, a)
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4i32")]
+        fn _vst3q_lane_s32(
+            ptr: *mut i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst3q_s32(a as _, b.0, b.1, b.2, LANE, 4)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v2f32.p0i8"
+        )]
+        fn _vst3_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8);
+    }
+    _vst3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_p64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t {
-    static_assert!(LANE == 0);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v4f32.p0i8"
+        )]
+        fn _vst3q_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8);
+    }
+    _vst3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(LANE, 4);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v8i8.p0i8"
+        )]
+        fn _vst3_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8);
+    }
+    _vst3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v4i16.p0i8"
+        )]
+        fn _vst3_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8);
+    }
+    _vst3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v8i16.p0i8"
+        )]
+        fn _vst3q_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8);
+    }
+    _vst3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_s64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
     static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(b, LANE as u32, a)
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v2i32.p0i8"
+        )]
+        fn _vst3_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8);
+    }
+    _vst3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
+#[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(LANE, 4);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v4i32.p0i8"
+        )]
+        fn _vst3q_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8);
+    }
+    _vst3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x3_t) {
     static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(b, LANE as u32, a)
+    vst3_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(b, LANE as u32, a)
+    vst3_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_u64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst3q_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(LANE, 4);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst3_lane_s32::<LANE>(transmute(a), transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_insert!(b, LANE as u32, a)
-}
-
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x3_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst3q_lane_s32::<LANE>(transmute(a), transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vset_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(b, LANE as u32, a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst3_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Insert vector element from another vector element
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsetq_lane_f32<const LANE: i32>(a: f32, b: float32x4_t) -> float32x4_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x3_t) {
     static_assert_uimm_bits!(LANE, 2);
-    simd_insert!(b, LANE as u32, a)
+    vst3_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Signed Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v8i8")]
-        fn vshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-vshl_s8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x3_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst3q_lane_s16::<LANE>(transmute(a), transmute(b))
+}
+
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) {
+    vst3_s64(transmute(a), transmute(b))
 }
 
-/// Signed Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v16i8")]
-        fn vshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v1i64.p0i8"
+        )]
+        fn _vst3_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8);
     }
-vshlq_s8_(a, b)
+    _vst3_s64(b.0, b.1, b.2, a as _)
 }
 
-/// Signed Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v4i16")]
-        fn vshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v1i64")]
+        fn _vst3_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32);
     }
-vshl_s16_(a, b)
+    _vst3_s64(a as _, b.0, b.1, b.2, 8)
 }
 
-/// Signed Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v8i16")]
-        fn vshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-vshlq_s16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) {
+    vst3_s64(transmute(a), transmute(b))
 }
 
-/// Signed Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v2i32")]
-        fn vshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-vshl_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) {
+    vst3_s8(transmute(a), transmute(b))
 }
 
-/// Signed Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v4i32")]
-        fn vshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-vshlq_s32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) {
+    vst3q_s8(transmute(a), transmute(b))
 }
 
-/// Signed Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_s64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v1i64")]
-        fn vshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-vshl_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) {
+    vst3_s16(transmute(a), transmute(b))
 }
 
-/// Signed Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_s64)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sshl.v2i64")]
-        fn vshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-vshlq_s64_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) {
+    vst3q_s16(transmute(a), transmute(b))
 }
 
-/// Unsigned Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v8i8")]
-        fn vshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
-    }
-vshl_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) {
+    vst3_s32(transmute(a), transmute(b))
 }
 
-/// Unsigned Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u8)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v16i8")]
-        fn vshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
-    }
-vshlq_u8_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) {
+    vst3q_s32(transmute(a), transmute(b))
 }
 
-/// Unsigned Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v4i16")]
-        fn vshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
-    }
-vshl_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) {
+    vst3_s8(transmute(a), transmute(b))
 }
 
-/// Unsigned Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u16)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v8i16")]
-        fn vshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    }
-vshlq_u16_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) {
+    vst3q_s8(transmute(a), transmute(b))
 }
 
-/// Unsigned Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v2i32")]
-        fn vshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-vshl_u32_(a, b)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) {
+    vst3_s16(transmute(a), transmute(b))
 }
 
-/// Unsigned Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u32)
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st3)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) {
+    vst3q_s16(transmute(a), transmute(b))
+}
+
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v4i32")]
-        fn vshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v2f32")]
+        fn _vst4_f32(
+            ptr: *mut i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            size: i32,
+        );
     }
-vshlq_u32_(a, b)
+    _vst4_f32(a as _, b.0, b.1, b.2, b.3, 4)
 }
 
-/// Unsigned Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v1i64")]
-        fn vshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4f32")]
+        fn _vst4q_f32(
+            ptr: *mut i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            size: i32,
+        );
     }
-vshl_u64_(a, b)
+    _vst4q_f32(a as _, b.0, b.1, b.2, b.3, 4)
 }
 
-/// Unsigned Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushl))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
     extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.ushl.v2i64")]
-        fn vshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8i8")]
+        fn _vst4_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32);
     }
-vshlq_u64_(a, b)
+    _vst4_s8(a as _, b.0, b.1, b.2, b.3, 1)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shl(a, vdup_n_s8(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v16i8")]
+        fn _vst4q_s8(
+            ptr: *mut i8,
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            size: i32,
+        );
+    }
+    _vst4q_s8(a as _, b.0, b.1, b.2, b.3, 1)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shl(a, vdupq_n_s8(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4i16")]
+        fn _vst4_s16(
+            ptr: *mut i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            size: i32,
+        );
+    }
+    _vst4_s16(a as _, b.0, b.1, b.2, b.3, 2)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shl(a, vdup_n_s16(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8i16")]
+        fn _vst4q_s16(
+            ptr: *mut i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            size: i32,
+        );
+    }
+    _vst4q_s16(a as _, b.0, b.1, b.2, b.3, 2)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shl(a, vdupq_n_s16(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v2i32")]
+        fn _vst4_s32(
+            ptr: *mut i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            size: i32,
+        );
+    }
+    _vst4_s32(a as _, b.0, b.1, b.2, b.3, 4)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    simd_shl(a, vdup_n_s32(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vst4))]
+pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4i32")]
+        fn _vst4q_s32(
+            ptr: *mut i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            size: i32,
+        );
+    }
+    _vst4q_s32(a as _, b.0, b.1, b.2, b.3, 4)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    simd_shl(a, vdupq_n_s32(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v2f32.p0i8"
+        )]
+        fn _vst4_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8);
+    }
+    _vst4_f32(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shl(a, vdup_n_u8(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v4f32.p0i8"
+        )]
+        fn _vst4q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8);
+    }
+    _vst4q_f32(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shl(a, vdupq_n_u8(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v8i8.p0i8"
+        )]
+        fn _vst4_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
+    }
+    _vst4_s8(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shl(a, vdup_n_u16(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v16i8.p0i8"
+        )]
+        fn _vst4q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
+    }
+    _vst4q_s8(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    simd_shl(a, vdupq_n_u16(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v4i16.p0i8"
+        )]
+        fn _vst4_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8);
+    }
+    _vst4_s16(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    simd_shl(a, vdup_n_u32(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v8i16.p0i8"
+        )]
+        fn _vst4q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8);
+    }
+    _vst4q_s16(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    simd_shl(a, vdupq_n_u32(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v2i32.p0i8"
+        )]
+        fn _vst4_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8);
+    }
+    _vst4_s32(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    simd_shl(a, vdup_n_s64(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v4i32.p0i8"
+        )]
+        fn _vst4q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8);
+    }
+    _vst4q_s32(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_s64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    simd_shl(a, vdupq_n_s64(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v2f32")]
+        fn _vst4_lane_f32(
+            ptr: *mut i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst4_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    simd_shl(a, vdup_n_u64(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4f32")]
+        fn _vst4q_lane_f32(
+            ptr: *mut i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst4q_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
 
-/// Shift left
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshlq_n_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shl, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    simd_shl(a, vdupq_n_u64(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8i8")]
+        fn _vst4_lane_s8(
+            ptr: *mut i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst4_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshll_n_s8<const N: i32>(a: int8x8_t) -> int16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    simd_shl(simd_cast(a), vdupq_n_s16(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4i16")]
+        fn _vst4_lane_s16(
+            ptr: *mut i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst4_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshll_n_s16<const N: i32>(a: int16x4_t) -> int32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    simd_shl(simd_cast(a), vdupq_n_s32(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8i16")]
+        fn _vst4q_lane_s16(
+            ptr: *mut i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst4q_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_s32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshll_n_s32<const N: i32>(a: int32x2_t) -> int64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    simd_shl(simd_cast(a), vdupq_n_s64(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v2i32")]
+        fn _vst4_lane_s32(
+            ptr: *mut i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst4_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshll_n_u8<const N: i32>(a: uint8x8_t) -> uint16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    simd_shl(simd_cast(a), vdupq_n_u16(N as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vst4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4i32")]
+        fn _vst4q_lane_s32(
+            ptr: *mut i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i32,
+            size: i32,
+        );
+    }
+    _vst4q_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshll_n_u16<const N: i32>(a: uint16x4_t) -> uint32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    simd_shl(simd_cast(a), vdupq_n_u32(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v2f32.p0i8"
+        )]
+        fn _vst4_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Signed shift left long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_n_u32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushll, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshll_n_u32<const N: i32>(a: uint32x2_t) -> uint64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    simd_shl(simd_cast(a), vdupq_n_u64(N as _))
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v4f32.p0i8"
+        )]
+        fn _vst4q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    let n: i32 = if N == 8 { 7 } else { N };
-    simd_shr(a, vdup_n_s8(n as _))
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v8i8.p0i8"
+        )]
+        fn _vst4_lane_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8);
+    }
+    _vst4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    let n: i32 = if N == 8 { 7 } else { N };
-    simd_shr(a, vdupq_n_s8(n as _))
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v4i16.p0i8"
+        )]
+        fn _vst4_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshr_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    let n: i32 = if N == 16 { 15 } else { N };
-    simd_shr(a, vdup_n_s16(n as _))
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v8i16.p0i8"
+        )]
+        fn _vst4q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    let n: i32 = if N == 16 { 15 } else { N };
-    simd_shr(a, vdupq_n_s16(n as _))
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v2i32.p0i8"
+        )]
+        fn _vst4_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshr_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    let n: i32 = if N == 32 { 31 } else { N };
-    simd_shr(a, vdup_n_s32(n as _))
+#[cfg(not(target_arch = "arm"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4lane.v4i32.p0i8"
+        )]
+        fn _vst4q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i64,
+            ptr: *mut i8,
+        );
+    }
+    _vst4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    let n: i32 = if N == 32 { 31 } else { N };
-    simd_shr(a, vdupq_n_s32(n as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst4_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_s64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshr_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 { 63 } else { N };
-    simd_shr(a, vdup_n_s64(n as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst4_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_s64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 { 63 } else { N };
-    simd_shr(a, vdupq_n_s64(n as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst4q_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    let n: i32 = if N == 8 { return vdup_n_u8(0); } else { N };
-    simd_shr(a, vdup_n_u8(n as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x4_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst4_lane_s32::<LANE>(transmute(a), transmute(b))
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    let n: i32 = if N == 8 { return vdupq_n_u8(0); } else { N };
-    simd_shr(a, vdupq_n_u8(n as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst4q_lane_s32::<LANE>(transmute(a), transmute(b))
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshr_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    let n: i32 = if N == 16 { return vdup_n_u16(0); } else { N };
-    simd_shr(a, vdup_n_u16(n as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst4_lane_s8::<LANE>(transmute(a), transmute(b))
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    let n: i32 = if N == 16 { return vdupq_n_u16(0); } else { N };
-    simd_shr(a, vdupq_n_u16(n as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x4_t) {
+    static_assert_uimm_bits!(LANE, 2);
+    vst4_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshr_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    let n: i32 = if N == 32 { return vdup_n_u32(0); } else { N };
-    simd_shr(a, vdup_n_u32(n as _))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x4_t) {
+    static_assert_uimm_bits!(LANE, 3);
+    vst4q_lane_s16::<LANE>(transmute(a), transmute(b))
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    let n: i32 = if N == 32 { return vdupq_n_u32(0); } else { N };
-    simd_shr(a, vdupq_n_u32(n as _))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) {
+    vst4_s64(transmute(a), transmute(b))
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshr_n_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshr_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 { return vdup_n_u64(0); } else { N };
-    simd_shr(a, vdup_n_u64(n as _))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v1i64")]
+        fn _vst4_s64(
+            ptr: *mut i8,
+            a: int64x1_t,
+            b: int64x1_t,
+            c: int64x1_t,
+            d: int64x1_t,
+            size: i32,
+        );
+    }
+    _vst4_s64(a as _, b.0, b.1, b.2, b.3, 8)
 }
 
-/// Shift right
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrq_n_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ushr, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 { return vdupq_n_u64(0); } else { N };
-    simd_shr(a, vdupq_n_u64(n as _))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st4.v1i64.p0i8"
+        )]
+        fn _vst4_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8);
+    }
+    _vst4_s64(b.0, b.1, b.2, b.3, a as _)
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_cast(simd_shr(a, vdupq_n_s16(N as _)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) {
+    vst4_s64(transmute(a), transmute(b))
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_cast(simd_shr(a, vdupq_n_s32(N as _)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) {
+    vst4_s8(transmute(a), transmute(b))
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_s64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_cast(simd_shr(a, vdupq_n_s64(N as _)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) {
+    vst4q_s8(transmute(a), transmute(b))
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_cast(simd_shr(a, vdupq_n_u16(N as _)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) {
+    vst4_s16(transmute(a), transmute(b))
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_cast(simd_shr(a, vdupq_n_u32(N as _)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) {
+    vst4q_s16(transmute(a), transmute(b))
 }
 
-/// Shift right narrow
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_n_u64)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(shrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_cast(simd_shr(a, vdupq_n_u64(N as _)))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) {
+    vst4_s32(transmute(a), transmute(b))
 }
 
-/// Signed shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsra_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_add(a, vshr_n_s8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) {
+    vst4q_s32(transmute(a), transmute(b))
 }
 
-/// Signed shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s8)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsraq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_add(a, vshrq_n_s8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) {
+    vst4_s8(transmute(a), transmute(b))
 }
 
-/// Signed shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsra_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_add(a, vshr_n_s16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) {
+    vst4q_s8(transmute(a), transmute(b))
 }
 
-/// Signed shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s16)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsraq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_add(a, vshrq_n_s16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) {
+    vst4_s16(transmute(a), transmute(b))
 }
 
-/// Signed shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s32)
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsra_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_add(a, vshr_n_s32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(st4)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) {
+    vst4q_s16(transmute(a), transmute(b))
 }
 
-/// Signed shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s32)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsraq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_add(a, vshrq_n_s32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_sub(a, b)
 }
 
-/// Signed shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s64)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsra_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    simd_add(a, vshr_n_s64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_sub(a, b)
 }
 
-/// Signed shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_s64)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(ssra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsraq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    simd_add(a, vshrq_n_s64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_sub(a, b)
 }
 
-/// Unsigned shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u8)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsra_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_add(a, vshr_n_u8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_sub(a, b)
 }
 
-/// Unsigned shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u8)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsraq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_add(a, vshrq_n_u8::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_sub(a, b)
 }
 
-/// Unsigned shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u16)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsra_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_add(a, vshr_n_u16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_sub(a, b)
 }
 
-/// Unsigned shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u16)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsraq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_add(a, vshrq_n_u16::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_sub(a, b)
 }
 
-/// Unsigned shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u32)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsra_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_add(a, vshr_n_u32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_sub(a, b)
 }
 
-/// Unsigned shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u32)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsraq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_add(a, vshrq_n_u32::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_sub(a, b)
 }
 
-/// Unsigned shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_u64)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsra_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    simd_add(a, vshr_n_u64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_sub(a, b)
 }
 
-/// Unsigned shift right and accumulate
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsraq_n_u64)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(usra, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    simd_add(a, vshrq_n_u64::<N>(b))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    simd_sub(a, b)
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
-    let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-    let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_sub(a, b)
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
-    let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-    let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_sub(a, b)
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
-    let a1: int8x16_t = simd_shuffle!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]);
-    let b1: int8x16_t = simd_shuffle!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_sub(a, b)
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
-    let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-    let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_sub(a, b)
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
-    let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-    let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_sub(a, b)
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
-    let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-    let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_sub(a, b)
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
-    let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-    let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_sub(a, b)
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
-    let a1: uint8x16_t = simd_shuffle!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]);
-    let b1: uint8x16_t = simd_shuffle!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let d: int8x8_t = vsubhn_s16(b, c);
+    simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
-    let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-    let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let d: int16x4_t = vsubhn_s32(b, c);
+    simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
-    let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-    let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let d: int32x2_t = vsubhn_s64(b, c);
+    simd_shuffle!(a, d, [0, 1, 2, 3])
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
-    let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-    let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    let d: uint8x8_t = vsubhn_u16(b, c);
+    simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
-    let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-    let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    let d: uint16x4_t = vsubhn_u32(b, c);
+    simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
-    let a1: poly8x16_t = simd_shuffle!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]);
-    let b1: poly8x16_t = simd_shuffle!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    let d: uint32x2_t = vsubhn_u64(b, c);
+    simd_shuffle!(a, d, [0, 1, 2, 3])
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
-    let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
-    let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
-    let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]);
-    let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    let c: i32x4 = i32x4::new(16, 16, 16, 16);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
-    let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
-    let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    let c: i64x2 = i64x2::new(32, 32);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
-    let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]);
-    let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Transpose elements
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(trn))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
-    let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
-    let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
-    transmute((a1, b1))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    let c: u32x4 = u32x4::new(16, 16, 16, 16);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)
+#[doc = "Subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
-    let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(subhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    let c: u64x2 = u64x2::new(32, 32);
+    simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
-    let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-    let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssubl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
+    let c: int16x8_t = simd_cast(a);
+    let d: int16x8_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
-    let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssubl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    let c: int32x4_t = simd_cast(a);
+    let d: int32x4_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
-    let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-    let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssubl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    let c: int64x2_t = simd_cast(a);
+    let d: int64x2_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
-    let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usubl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
+    let c: uint16x8_t = simd_cast(a);
+    let d: uint16x8_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
-    let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-    let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usubl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    let c: uint32x4_t = simd_cast(a);
+    let d: uint32x4_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
-    let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
-    let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usubl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    let c: uint64x2_t = simd_cast(a);
+    let d: uint64x2_t = simd_cast(b);
+    simd_sub(c, d)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
-    let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
-    let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssubw)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
-    let a0: int8x16_t = simd_shuffle!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
-    let b0: int8x16_t = simd_shuffle!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssubw)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
-    let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ssubw)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
-    let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-    let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usubw)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
-    let a0: uint8x16_t = simd_shuffle!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
-    let b0: uint8x16_t = simd_shuffle!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usubw)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
-    let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usubw)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
+    simd_sub(a, simd_cast(b))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
-    let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-    let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-    transmute((a0, b0))
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sudot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsudot_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int8x8_t,
+    c: uint8x8_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = transmute(c);
+    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vusdot_s32(a, transmute(c), b)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p8)
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
-    let a0: poly8x16_t = simd_shuffle!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]);
-    let b0: poly8x16_t = simd_shuffle!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]);
-    transmute((a0, b0))
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sudot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vsudotq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int8x16_t,
+    c: uint8x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = transmute(c);
+    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vusdotq_s32(a, transmute(c), b)
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p16)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
-    let a0: poly16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
-    let b0: poly16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+    let a1: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+    let b1: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+    transmute((a1, b1))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
-    let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
-    let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
-    transmute((a0, b0))
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+    let a1: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+    let b1: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+    transmute((a1, b1))
 }
 
-/// Zip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
-    let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
-    let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+    let a1: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+    let b1: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
-    let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-    let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+    let a1: float32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+    let b1: float32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
-    let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-    let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+    let a1: int8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: int8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
-    let a0: int8x16_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]);
-    let b0: int8x16_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+    let a1: int8x16_t = simd_shuffle!(
+        a,
+        b,
+        [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+    );
+    let b1: int8x16_t = simd_shuffle!(
+        a,
+        b,
+        [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+    );
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
-    let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-    let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+    let a1: int16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+    let b1: int16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
-    let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-    let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+    let a1: int16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: int16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
-    let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-    let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+    let a1: int32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+    let b1: int32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
-    let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-    let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+    let a1: uint8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: uint8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
-    let a0: uint8x16_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]);
-    let b0: uint8x16_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+    let a1: uint8x16_t = simd_shuffle!(
+        a,
+        b,
+        [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+    );
+    let b1: uint8x16_t = simd_shuffle!(
+        a,
+        b,
+        [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+    );
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
-    let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-    let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+    let a1: uint16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+    let b1: uint16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
-    let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-    let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+    let a1: uint16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: uint16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
-    let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-    let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+    let a1: uint32x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+    let b1: uint32x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
-    let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-    let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+    let a1: poly8x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: poly8x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
-    let a0: poly8x16_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]);
-    let b0: poly8x16_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    let a1: poly8x16_t = simd_shuffle!(
+        a,
+        b,
+        [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]
+    );
+    let b1: poly8x16_t = simd_shuffle!(
+        a,
+        b,
+        [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]
+    );
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
-    let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
-    let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    let a1: poly16x4_t = simd_shuffle!(a, b, [0, 4, 2, 6]);
+    let b1: poly16x4_t = simd_shuffle!(a, b, [1, 5, 3, 7]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)
+#[doc = "Transpose elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrnq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
-    let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
-    let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
-    transmute((a0, b0))
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(trn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    let a1: poly16x8_t = simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]);
+    let b1: poly16x8_t = simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]);
+    transmute((a1, b1))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
-    let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
-    let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t {
+    let c: int8x8_t = simd_and(a, b);
+    let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(zip))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
-    let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
-    let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t {
+    let c: int8x16_t = simd_and(a, b);
+    let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Unzip vectors
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uzp))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
-    let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
-    let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
-    transmute((a0, b0))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t {
+    let c: int16x4_t = simd_and(a, b);
+    let d: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Unsigned Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_u8)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    let d: uint8x8_t = vabd_u8(b, c);
-    simd_add(a, simd_cast(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t {
+    let c: int16x8_t = simd_and(a, b);
+    let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Unsigned Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_u16)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    let d: uint16x4_t = vabd_u16(b, c);
-    simd_add(a, simd_cast(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t {
+    let c: int32x2_t = simd_and(a, b);
+    let d: i32x2 = i32x2::new(0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Unsigned Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_u32)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(uabal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    let d: uint32x2_t = vabd_u32(b, c);
-    simd_add(a, simd_cast(d))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t {
+    let c: int32x4_t = simd_and(a, b);
+    let d: i32x4 = i32x4::new(0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Signed Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s8)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    let d: int8x8_t = vabd_s8(b, c);
-    let e: uint8x8_t = simd_cast(d);
-    simd_add(a, simd_cast(e))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t {
+    let c: poly8x8_t = simd_and(a, b);
+    let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Signed Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s16)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    let d: int16x4_t = vabd_s16(b, c);
-    let e: uint16x4_t = simd_cast(d);
-    simd_add(a, simd_cast(e))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
+    let c: poly8x16_t = simd_and(a, b);
+    let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Signed Absolute difference and Accumulate Long
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s32)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sabal))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    let d: int32x2_t = vabd_s32(b, c);
-    let e: uint32x2_t = simd_cast(d);
-    simd_add(a, simd_cast(e))
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t {
+    let c: poly16x4_t = simd_and(a, b);
+    let d: i16x4 = i16x4::new(0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Signed saturating Absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)
+#[doc = "Signed compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v8i8")]
-        fn vqabs_s8_(a: int8x8_t) -> int8x8_t;
-    }
-vqabs_s8_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t {
+    let c: poly16x8_t = simd_and(a, b);
+    let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Signed saturating Absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v16i8")]
-        fn vqabsq_s8_(a: int8x16_t) -> int8x16_t;
-    }
-vqabsq_s8_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    let c: uint8x8_t = simd_and(a, b);
+    let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Signed saturating Absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v4i16")]
-        fn vqabs_s16_(a: int16x4_t) -> int16x4_t;
-    }
-vqabs_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    let c: uint8x16_t = simd_and(a, b);
+    let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Signed saturating Absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v8i16")]
-        fn vqabsq_s16_(a: int16x8_t) -> int16x8_t;
-    }
-vqabsq_s16_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    let c: uint16x4_t = simd_and(a, b);
+    let d: u16x4 = u16x4::new(0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Signed saturating Absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v2i32")]
-        fn vqabs_s32_(a: int32x2_t) -> int32x2_t;
-    }
-vqabs_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    let c: uint16x8_t = simd_and(a, b);
+    let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
+    simd_ne(c, transmute(d))
 }
 
-/// Signed saturating Absolute value
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtst_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
-#[cfg_attr(all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), assert_instr(sqabs))]
-#[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
-#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
-pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
-        #[cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), link_name = "llvm.aarch64.neon.sqabs.v4i32")]
-        fn vqabsq_s32_(a: int32x4_t) -> int32x4_t;
-    }
-vqabsq_s32_(a)
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    let c: uint32x2_t = simd_and(a, b);
+    let d: u32x2 = u32x2::new(0, 0);
+    simd_ne(c, transmute(d))
 }
 
-#[cfg(test)]
-#[allow(overflowing_literals)]
-mod test {
-    use super::*;
-    use crate::core_arch::simd::*;
-    use std::mem::transmute;
-    use stdarch_test::simd_test;
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s8() {
-        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s8() {
-        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let b: i8x16 = i8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s16() {
-        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
-        let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s16() {
-        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s32() {
-        let a: i32x2 = i32x2::new(0x00, 0x01);
-        let b: i32x2 = i32x2::new(0x0F, 0x0F);
-        let e: i32x2 = i32x2::new(0x00, 0x01);
-        let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i32x2 = i32x2::new(0x00, 0x01);
-        let b: i32x2 = i32x2::new(0x00, 0x00);
-        let e: i32x2 = i32x2::new(0x00, 0x00);
-        let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s32() {
-        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(0x0F, 0x0F, 0x0F, 0x0F);
-        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u8() {
-        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u8() {
-        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let b: u8x16 = u8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00);
-        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u16() {
-        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
-        let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u16() {
-        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u32() {
-        let a: u32x2 = u32x2::new(0x00, 0x01);
-        let b: u32x2 = u32x2::new(0x0F, 0x0F);
-        let e: u32x2 = u32x2::new(0x00, 0x01);
-        let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u32x2 = u32x2::new(0x00, 0x01);
-        let b: u32x2 = u32x2::new(0x00, 0x00);
-        let e: u32x2 = u32x2::new(0x00, 0x00);
-        let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u32() {
-        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0x0F, 0x0F, 0x0F, 0x0F);
-        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s64() {
-        let a: i64x1 = i64x1::new(0x00);
-        let b: i64x1 = i64x1::new(0x0F);
-        let e: i64x1 = i64x1::new(0x00);
-        let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i64x1 = i64x1::new(0x00);
-        let b: i64x1 = i64x1::new(0x00);
-        let e: i64x1 = i64x1::new(0x00);
-        let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s64() {
-        let a: i64x2 = i64x2::new(0x00, 0x01);
-        let b: i64x2 = i64x2::new(0x0F, 0x0F);
-        let e: i64x2 = i64x2::new(0x00, 0x01);
-        let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i64x2 = i64x2::new(0x00, 0x01);
-        let b: i64x2 = i64x2::new(0x00, 0x00);
-        let e: i64x2 = i64x2::new(0x00, 0x00);
-        let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u64() {
-        let a: u64x1 = u64x1::new(0x00);
-        let b: u64x1 = u64x1::new(0x0F);
-        let e: u64x1 = u64x1::new(0x00);
-        let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u64x1 = u64x1::new(0x00);
-        let b: u64x1 = u64x1::new(0x00);
-        let e: u64x1 = u64x1::new(0x00);
-        let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u64() {
-        let a: u64x2 = u64x2::new(0x00, 0x01);
-        let b: u64x2 = u64x2::new(0x0F, 0x0F);
-        let e: u64x2 = u64x2::new(0x00, 0x01);
-        let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u64x2 = u64x2::new(0x00, 0x01);
-        let b: u64x2 = u64x2::new(0x00, 0x00);
-        let e: u64x2 = u64x2::new(0x00, 0x00);
-        let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s8() {
-        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i8x8 = transmute(vorr_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s8() {
-        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: i8x16 = transmute(vorrq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s16() {
-        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i16x4 = transmute(vorr_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s16() {
-        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i16x8 = transmute(vorrq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s32() {
-        let a: i32x2 = i32x2::new(0x00, 0x01);
-        let b: i32x2 = i32x2::new(0x00, 0x00);
-        let e: i32x2 = i32x2::new(0x00, 0x01);
-        let r: i32x2 = transmute(vorr_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s32() {
-        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i32x4 = transmute(vorrq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u8() {
-        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u8x8 = transmute(vorr_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u8() {
-        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: u8x16 = transmute(vorrq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u16() {
-        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u16x4 = transmute(vorr_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u16() {
-        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u16x8 = transmute(vorrq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u32() {
-        let a: u32x2 = u32x2::new(0x00, 0x01);
-        let b: u32x2 = u32x2::new(0x00, 0x00);
-        let e: u32x2 = u32x2::new(0x00, 0x01);
-        let r: u32x2 = transmute(vorr_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u32() {
-        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u32x4 = transmute(vorrq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s64() {
-        let a: i64x1 = i64x1::new(0x00);
-        let b: i64x1 = i64x1::new(0x00);
-        let e: i64x1 = i64x1::new(0x00);
-        let r: i64x1 = transmute(vorr_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s64() {
-        let a: i64x2 = i64x2::new(0x00, 0x01);
-        let b: i64x2 = i64x2::new(0x00, 0x00);
-        let e: i64x2 = i64x2::new(0x00, 0x01);
-        let r: i64x2 = transmute(vorrq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u64() {
-        let a: u64x1 = u64x1::new(0x00);
-        let b: u64x1 = u64x1::new(0x00);
-        let e: u64x1 = u64x1::new(0x00);
-        let r: u64x1 = transmute(vorr_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u64() {
-        let a: u64x2 = u64x2::new(0x00, 0x01);
-        let b: u64x2 = u64x2::new(0x00, 0x00);
-        let e: u64x2 = u64x2::new(0x00, 0x01);
-        let r: u64x2 = transmute(vorrq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s8() {
-        let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i8x8 = transmute(veor_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s8() {
-        let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: i8x16 = transmute(veorq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s16() {
-        let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i16x4 = transmute(veor_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s16() {
-        let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: i16x8 = transmute(veorq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s32() {
-        let a: i32x2 = i32x2::new(0x00, 0x01);
-        let b: i32x2 = i32x2::new(0x00, 0x00);
-        let e: i32x2 = i32x2::new(0x00, 0x01);
-        let r: i32x2 = transmute(veor_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s32() {
-        let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: i32x4 = transmute(veorq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u8() {
-        let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u8x8 = transmute(veor_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u8() {
-        let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
-        let r: u8x16 = transmute(veorq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u16() {
-        let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u16x4 = transmute(veor_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u16() {
-        let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let r: u16x8 = transmute(veorq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u32() {
-        let a: u32x2 = u32x2::new(0x00, 0x01);
-        let b: u32x2 = u32x2::new(0x00, 0x00);
-        let e: u32x2 = u32x2::new(0x00, 0x01);
-        let r: u32x2 = transmute(veor_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u32() {
-        let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00);
-        let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03);
-        let r: u32x4 = transmute(veorq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s64() {
-        let a: i64x1 = i64x1::new(0x00);
-        let b: i64x1 = i64x1::new(0x00);
-        let e: i64x1 = i64x1::new(0x00);
-        let r: i64x1 = transmute(veor_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s64() {
-        let a: i64x2 = i64x2::new(0x00, 0x01);
-        let b: i64x2 = i64x2::new(0x00, 0x00);
-        let e: i64x2 = i64x2::new(0x00, 0x01);
-        let r: i64x2 = transmute(veorq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u64() {
-        let a: u64x1 = u64x1::new(0x00);
-        let b: u64x1 = u64x1::new(0x00);
-        let e: u64x1 = u64x1::new(0x00);
-        let r: u64x1 = transmute(veor_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u64() {
-        let a: u64x2 = u64x2::new(0x00, 0x01);
-        let b: u64x2 = u64x2::new(0x00, 0x00);
-        let e: u64x2 = u64x2::new(0x00, 0x01);
-        let r: u64x2 = transmute(veorq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: i8x8 = i8x8::new(15, 13, 11, 9, 7, 5, 3, 1);
-        let r: i8x8 = transmute(vabd_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
-        let e: i8x16 = i8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15);
-        let r: i8x16 = transmute(vabdq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(16, 15, 14, 13);
-        let e: i16x4 = i16x4::new(15, 13, 11, 9);
-        let r: i16x4 = transmute(vabd_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: i16x8 = i16x8::new(15, 13, 11, 9, 7, 5, 3, 1);
-        let r: i16x8 = transmute(vabdq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(16, 15);
-        let e: i32x2 = i32x2::new(15, 13);
-        let r: i32x2 = transmute(vabd_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(16, 15, 14, 13);
-        let e: i32x4 = i32x4::new(15, 13, 11, 9);
-        let r: i32x4 = transmute(vabdq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: u8x8 = u8x8::new(15, 13, 11, 9, 7, 5, 3, 1);
-        let r: u8x8 = transmute(vabd_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
-        let e: u8x16 = u8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15);
-        let r: u8x16 = transmute(vabdq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(16, 15, 14, 13);
-        let e: u16x4 = u16x4::new(15, 13, 11, 9);
-        let r: u16x4 = transmute(vabd_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: u16x8 = u16x8::new(15, 13, 11, 9, 7, 5, 3, 1);
-        let r: u16x8 = transmute(vabdq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(16, 15);
-        let e: u32x2 = u32x2::new(15, 13);
-        let r: u32x2 = transmute(vabd_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(16, 15, 14, 13);
-        let e: u32x4 = u32x4::new(15, 13, 11, 9);
-        let r: u32x4 = transmute(vabdq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabd_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let b: f32x2 = f32x2::new(9.0, 3.0);
-        let e: f32x2 = f32x2::new(8.0, 1.0);
-        let r: f32x2 = transmute(vabd_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 5.0, -4.0);
-        let b: f32x4 = f32x4::new(9.0, 3.0, 2.0, 8.0);
-        let e: f32x4 = f32x4::new(8.0, 1.0, 3.0, 12.0);
-        let r: f32x4 = transmute(vabdq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 4, 3, 2, 1);
-        let b: u8x8 = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: u16x8 = u16x8::new(9, 8, 7, 6, 6, 7, 8, 9);
-        let r: u16x8 = transmute(vabdl_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(10, 10, 10, 10);
-        let e: u32x4 = u32x4::new(9, 8, 7, 6);
-        let r: u32x4 = transmute(vabdl_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(10, 10);
-        let e: u64x2 = u64x2::new(9, 8);
-        let r: u64x2 = transmute(vabdl_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 4, 3, 2, 1);
-        let b: i8x8 = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: i16x8 = i16x8::new(9, 8, 7, 6, 6, 7, 8, 9);
-        let r: i16x8 = transmute(vabdl_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 11, 12);
-        let b: i16x4 = i16x4::new(10, 10, 10, 10);
-        let e: i32x4 = i32x4::new(9, 8, 1, 2);
-        let r: i32x4 = transmute(vabdl_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabdl_s32() {
-        let a: i32x2 = i32x2::new(1, 11);
-        let b: i32x2 = i32x2::new(10, 10);
-        let e: i64x2 = i64x2::new(9, 1);
-        let r: i64x2 = transmute(vabdl_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u8() {
-        let a: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u8x8 = u8x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u8x8 = u8x8::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u8() {
-        let a: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF);
-        let b: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u8x16 = u8x16::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0xFF);
-        let b: u8x16 = u8x16::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u16() {
-        let a: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u16x4 = u16x4::new(0, 0, 0x02, 0x03);
-        let b: u16x4 = u16x4::new(0, 0xFF_FF, 0x02, 0x04);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0);
-        let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u16() {
-        let a: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u16x8 = u16x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: u16x8 = u16x8::new(0, 0xFF_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0);
-        let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u32() {
-        let a: u32x2 = u32x2::new(0, 0x01);
-        let b: u32x2 = u32x2::new(0, 0x01);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u32x2 = u32x2::new(0, 0);
-        let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u32() {
-        let a: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: u32x4 = u32x4::new(0, 0, 0x02, 0x03);
-        let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x02, 0x04);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s8() {
-        let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s8() {
-        let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s16() {
-        let a: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i16x4 = i16x4::new(-32768, -32768, 0x02, 0x03);
-        let b: i16x4 = i16x4::new(-32768, 0x7F_FF, 0x02, 0x04);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0);
-        let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s16() {
-        let a: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i16x8 = i16x8::new(-32768, -32768, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i16x8 = i16x8::new(-32768, 0x7F_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0);
-        let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, 0x01);
-        let b: i32x2 = i32x2::new(-2147483648, 0x01);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i32x2 = i32x2::new(-2147483648, -2147483648);
-        let b: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i32x4 = i32x4::new(-2147483648, -2147483648, 0x02, 0x03);
-        let b: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 0x02, 0x04);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_p8() {
-        let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-        let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_p8() {
-        let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-
-        let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_f32() {
-        let a: f32x2 = f32x2::new(1.2, 3.4);
-        let b: f32x2 = f32x2::new(1.2, 3.4);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vceq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_f32() {
-        let a: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8);
-        let b: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vceqq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_s8() {
-        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vtst_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_s8() {
-        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vtstq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_s16() {
-        let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
-        let b: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vtst_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_s16() {
-        let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vtstq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, 0x00);
-        let b: i32x2 = i32x2::new(-2147483648, 0x00);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vtst_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
-        let b: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vtstq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_p8() {
-        let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vtst_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_p8() {
-        let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vtstq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_p16() {
-        let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
-        let b: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vtst_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_p16() {
-        let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vtstq_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_u8() {
-        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vtst_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_u8() {
-        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
-        let b: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF);
-        let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vtstq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_u16() {
-        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
-        let b: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02);
-        let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vtst_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_u16() {
-        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let b: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06);
-        let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vtstq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtst_u32() {
-        let a: u32x2 = u32x2::new(0, 0x00);
-        let b: u32x2 = u32x2::new(0, 0x00);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vtst_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtstq_u32() {
-        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
-        let b: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02);
-        let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vtstq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabs_f32() {
-        let a: f32x2 = f32x2::new(-0.1, -2.2);
-        let e: f32x2 = f32x2::new(0.1, 2.2);
-        let r: f32x2 = transmute(vabs_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabsq_f32() {
-        let a: f32x4 = f32x4::new(-0.1, -2.2, -3.3, -6.6);
-        let e: f32x4 = f32x4::new(0.1, 2.2, 3.3, 6.6);
-        let r: f32x4 = transmute(vabsq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcgt_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgtq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcgt_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgtq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcgt_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcgt_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgtq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcgt_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgtq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcgt_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_f32() {
-        let a: f32x2 = f32x2::new(1.2, 2.3);
-        let b: f32x2 = f32x2::new(0.1, 1.2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcgt_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_f32() {
-        let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
-        let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgtq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vclt_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcltq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vclt_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcltq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclt_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcltq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vclt_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcltq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vclt_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcltq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclt_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcltq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_f32() {
-        let a: f32x2 = f32x2::new(0.1, 1.2);
-        let b: f32x2 = f32x2::new(1.2, 2.3);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vclt_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_f32() {
-        let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
-        let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcltq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcle_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcleq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcle_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcleq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcle_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcleq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcle_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcleq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcle_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcleq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcle_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcleq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_f32() {
-        let a: f32x2 = f32x2::new(0.1, 1.2);
-        let b: f32x2 = f32x2::new(1.2, 2.3);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcle_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_f32() {
-        let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
-        let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcleq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcge_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgeq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcge_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgeq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcge_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgeq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vcge_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x16 = transmute(vcgeq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vcge_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x8 = transmute(vcgeq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcge_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgeq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_f32() {
-        let a: f32x2 = f32x2::new(1.2, 2.3);
-        let b: f32x2 = f32x2::new(0.1, 1.2);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcge_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_f32() {
-        let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5);
-        let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcgeq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcls_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x8 = i8x8::new(0, 7, 7, 7, 7, 7, 7, 7);
-        let r: i8x8 = transmute(vcls_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclsq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7F);
-        let e: i8x16 = i8x16::new(0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0);
-        let r: i8x16 = transmute(vclsq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcls_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x00);
-        let e: i16x4 = i16x4::new(0, 15, 15, 15);
-        let r: i16x4 = transmute(vcls_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclsq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(0, 15, 15, 15, 15, 15, 15, 15);
-        let r: i16x8 = transmute(vclsq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcls_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: i32x2 = i32x2::new(0, 31);
-        let r: i32x2 = transmute(vcls_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclsq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(0, 31, 31, 31);
-        let r: i32x4 = transmute(vclsq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcls_u8() {
-        let a: u8x8 = u8x8::new(0, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i8x8 = i8x8::new(7, 7, 7, 7, 7, 7, 7, 7);
-        let r: i8x8 = transmute(vcls_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclsq_u8() {
-        let a: u8x16 = u8x16::new(0, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF);
-        let e: i8x16 = i8x16::new(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7);
-        let r: i8x16 = transmute(vclsq_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcls_u16() {
-        let a: u16x4 = u16x4::new(0, 0xFF_FF, 0x00, 0x00);
-        let e: i16x4 = i16x4::new(15, 15, 15, 15);
-        let r: i16x4 = transmute(vcls_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclsq_u16() {
-        let a: u16x8 = u16x8::new(0, 0xFF_FF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
-        let e: i16x8 = i16x8::new(15, 15, 15, 15, 15, 15, 15, 15);
-        let r: i16x8 = transmute(vclsq_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcls_u32() {
-        let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let e: i32x2 = i32x2::new(31, 31);
-        let r: i32x2 = transmute(vcls_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unsigned compare bitwise Test bits nonzero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtstq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(cmtst)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    let c: uint32x4_t = simd_and(a, b);
+    let d: u32x4 = u32x4::new(0, 0, 0, 0);
+    simd_ne(c, transmute(d))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclsq_u32() {
-        let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x00, 0x00);
-        let e: i32x4 = i32x4::new(31, 31, 31, 31);
-        let r: i32x4 = transmute(vclsq_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vusdot_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: uint8x8_t,
+    c: int8x8_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = transmute(c);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vusdot_s32(a, b, transmute(c))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_s8() {
-        let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01);
-        let e: i8x8 = i8x8::new(0, 0, 8, 7, 7, 7, 7, 7);
-        let r: i8x8 = transmute(vclz_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vusdotq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: uint8x16_t,
+    c: int8x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = transmute(c);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vusdotq_s32(a, b, transmute(c))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_s8() {
-        let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7F);
-        let e: i8x16 = i8x16::new(0, 0, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1);
-        let r: i8x16 = transmute(vclzq_s8(transmute(a)));
-        assert_eq!(r, e);
+#[doc = "Dot product vector form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usdot.v2i32.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v2i32.v8i8")]
+        fn _vusdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
     }
+    _vusdot_s32(a, b.as_signed(), c)
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_s16() {
-        let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
-        let e: i16x4 = i16x4::new(0, 0, 16, 15);
-        let r: i16x4 = transmute(vclz_s16(transmute(a)));
-        assert_eq!(r, e);
+#[doc = "Dot product vector form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
+    extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usdot.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usdot.v4i32.v16i8")]
+        fn _vusdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
     }
+    _vusdotq_s32(a, b.as_signed(), c)
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_s16() {
-        let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01);
-        let e: i16x8 = i16x8::new(0, 0, 16, 15, 15, 15, 15, 15);
-        let r: i16x8 = transmute(vclzq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+    let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+    let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, -1);
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vclz_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
-        let e: i32x4 = i32x4::new(0, 0, 32, 31);
-        let r: i32x4 = transmute(vclzq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_u8() {
-        let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
-        let e: u8x8 = u8x8::new(8, 8, 7, 7, 7, 7, 7, 7);
-        let r: u8x8 = transmute(vclz_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_u8() {
-        let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF);
-        let e: u8x16 = u8x16::new(8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0);
-        let r: u8x16 = transmute(vclzq_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_u16() {
-        let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x01);
-        let e: u16x4 = u16x4::new(16, 16, 15, 15);
-        let r: u16x4 = transmute(vclz_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_u16() {
-        let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
-        let e: u16x8 = u16x8::new(16, 16, 15, 15, 15, 15, 15, 15);
-        let r: u16x8 = transmute(vclzq_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclz_u32() {
-        let a: u32x2 = u32x2::new(0, 0x00);
-        let e: u32x2 = u32x2::new(32, 32);
-        let r: u32x2 = transmute(vclz_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclzq_u32() {
-        let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x01);
-        let e: u32x4 = u32x4::new(32, 32, 31, 31);
-        let r: u32x4 = transmute(vclzq_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcagt_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(!0, 0);
-        let r: u32x2 = transmute(vcagt_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcagtq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(!0, 0, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vcagtq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcage_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(!0, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcage_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcageq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(!0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vcageq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcalt_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vcalt_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcaltq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcaltq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcale_f32() {
-        let a: f32x2 = f32x2::new(-1.2, 0.0);
-        let b: f32x2 = f32x2::new(-1.1, 0.0);
-        let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vcale_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcaleq_f32() {
-        let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
-        let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4);
-        let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF);
-        let r: u32x4 = transmute(vcaleq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_s8() {
-        let a: u64 = 1;
-        let e: i8x8 = i8x8::new(1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vcreate_s8(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_s16() {
-        let a: u64 = 1;
-        let e: i16x4 = i16x4::new(1, 0, 0, 0);
-        let r: i16x4 = transmute(vcreate_s16(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_s32() {
-        let a: u64 = 1;
-        let e: i32x2 = i32x2::new(1, 0);
-        let r: i32x2 = transmute(vcreate_s32(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_s64() {
-        let a: u64 = 1;
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vcreate_s64(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_u8() {
-        let a: u64 = 1;
-        let e: u8x8 = u8x8::new(1, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vcreate_u8(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_u16() {
-        let a: u64 = 1;
-        let e: u16x4 = u16x4::new(1, 0, 0, 0);
-        let r: u16x4 = transmute(vcreate_u16(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_u32() {
-        let a: u64 = 1;
-        let e: u32x2 = u32x2::new(1, 0);
-        let r: u32x2 = transmute(vcreate_u32(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_u64() {
-        let a: u64 = 1;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcreate_u64(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_p8() {
-        let a: u64 = 1;
-        let e: i8x8 = i8x8::new(1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vcreate_p8(a));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+    let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+    let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_p16() {
-        let a: u64 = 1;
-        let e: i16x4 = i16x4::new(1, 0, 0, 0);
-        let r: i16x4 = transmute(vcreate_p16(a));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+    let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+    let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vcreate_p64() {
-        let a: u64 = 1;
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vcreate_p64(a));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+    let a0: float32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+    let b0: float32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcreate_f32() {
-        let a: u64 = 0;
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vcreate_f32(a));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+    let a0: int8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: int8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f32_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let e: f32x2 = f32x2::new(1., 2.);
-        let r: f32x2 = transmute(vcvt_f32_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+    let a0: int8x16_t = simd_shuffle!(
+        a,
+        b,
+        [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+    );
+    let b0: int8x16_t = simd_shuffle!(
+        a,
+        b,
+        [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+    );
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_f32_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let r: f32x4 = transmute(vcvtq_f32_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+    let a0: int16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+    let b0: int16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_f32_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let e: f32x2 = f32x2::new(1., 2.);
-        let r: f32x2 = transmute(vcvt_f32_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+    let a0: int16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: int16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_f32_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let r: f32x4 = transmute(vcvtq_f32_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+    let a0: int32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+    let b0: int32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_f32_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let e: f32x2 = f32x2::new(0.25, 0.5);
-        let r: f32x2 = transmute(vcvt_n_f32_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+    let a0: uint8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: uint8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_f32_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
-        let r: f32x4 = transmute(vcvtq_n_f32_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+    let a0: uint8x16_t = simd_shuffle!(
+        a,
+        b,
+        [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+    );
+    let b0: uint8x16_t = simd_shuffle!(
+        a,
+        b,
+        [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+    );
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_f32_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let e: f32x2 = f32x2::new(0.25, 0.5);
-        let r: f32x2 = transmute(vcvt_n_f32_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+    let a0: uint16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+    let b0: uint16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_f32_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
-        let r: f32x4 = transmute(vcvtq_n_f32_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+    let a0: uint16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: uint16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_s32_f32() {
-        let a: f32x2 = f32x2::new(0.25, 0.5);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vcvt_n_s32_f32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+    let a0: uint32x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+    let b0: uint32x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_s32_f32() {
-        let a: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
-        let e: i32x4 = i32x4::new(1, 2, 3, 4);
-        let r: i32x4 = transmute(vcvtq_n_s32_f32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_n_u32_f32() {
-        let a: f32x2 = f32x2::new(0.25, 0.5);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vcvt_n_u32_f32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_n_u32_f32() {
-        let a: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vcvtq_n_u32_f32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.1, 2.1);
-        let e: i32x2 = i32x2::new(-1, 2);
-        let r: i32x2 = transmute(vcvt_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
-        let e: i32x4 = i32x4::new(-1, 2, -2, 3);
-        let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_u32_f32() {
-        let a: f32x2 = f32x2::new(1.1, 2.1);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vcvt_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
-        let e: u32x4 = u32x4::new(1, 2, 2, 3);
-        let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_s8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x8 = transmute(vdup_lane_s8::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_s8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x16 = transmute(vdupq_laneq_s8::<8>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vdup_lane_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vdupq_laneq_s16::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let e: i32x2 = i32x2::new(1, 1);
-        let r: i32x2 = transmute(vdup_lane_s32::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 4);
-        let e: i32x4 = i32x4::new(1, 1, 1, 1);
-        let r: i32x4 = transmute(vdupq_laneq_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_s8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x8 = transmute(vdup_laneq_s8::<8>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vdup_laneq_s16::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 4);
-        let e: i32x2 = i32x2::new(1, 1);
-        let r: i32x2 = transmute(vdup_laneq_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_s8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x16 = transmute(vdupq_lane_s8::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vdupq_lane_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let e: i32x4 = i32x4::new(1, 1, 1, 1);
-        let r: i32x4 = transmute(vdupq_lane_s32::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_u8() {
-        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u8x8 = transmute(vdup_lane_u8::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_u8() {
-        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u8x16 = transmute(vdupq_laneq_u8::<8>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 1, 1, 4);
-        let e: u16x4 = u16x4::new(1, 1, 1, 1);
-        let r: u16x4 = transmute(vdup_lane_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u16x8 = transmute(vdupq_laneq_u16::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let e: u32x2 = u32x2::new(1, 1);
-        let r: u32x2 = transmute(vdup_lane_u32::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 4);
-        let e: u32x4 = u32x4::new(1, 1, 1, 1);
-        let r: u32x4 = transmute(vdupq_laneq_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_u8() {
-        let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u8x8 = transmute(vdup_laneq_u8::<8>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u16x4 = u16x4::new(1, 1, 1, 1);
-        let r: u16x4 = transmute(vdup_laneq_u16::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 4);
-        let e: u32x2 = u32x2::new(1, 1);
-        let r: u32x2 = transmute(vdup_laneq_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_u8() {
-        let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u8x16 = transmute(vdupq_lane_u8::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 1, 1, 4);
-        let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: u16x8 = transmute(vdupq_lane_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let e: u32x4 = u32x4::new(1, 1, 1, 1);
-        let r: u32x4 = transmute(vdupq_lane_u32::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_p8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x8 = transmute(vdup_lane_p8::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_p8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x16 = transmute(vdupq_laneq_p8::<8>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_p16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vdup_lane_p16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_p16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vdupq_laneq_p16::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_p8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x8 = transmute(vdup_laneq_p8::<8>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_p16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vdup_laneq_p16::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_p8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8);
-        let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i8x16 = transmute(vdupq_lane_p8::<4>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_p16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 4);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vdupq_lane_p16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_s64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let e: i64x2 = i64x2::new(1, 1);
-        let r: i64x2 = transmute(vdupq_laneq_s64::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: i64x2 = i64x2::new(1, 1);
-        let r: i64x2 = transmute(vdupq_lane_s64::<0>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_u64() {
-        let a: u64x2 = u64x2::new(1, 1);
-        let e: u64x2 = u64x2::new(1, 1);
-        let r: u64x2 = transmute(vdupq_laneq_u64::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let e: u64x2 = u64x2::new(1, 1);
-        let r: u64x2 = transmute(vdupq_lane_u64::<0>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let e: f32x2 = f32x2::new(1., 1.);
-        let r: f32x2 = transmute(vdup_lane_f32::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
-        let e: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let r: f32x4 = transmute(vdupq_laneq_f32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 4.);
-        let e: f32x2 = f32x2::new(1., 1.);
-        let r: f32x2 = transmute(vdup_laneq_f32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let e: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let r: f32x4 = transmute(vdupq_lane_f32::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vdup_lane_s64::<0>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_lane_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vdup_lane_u64::<0>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vdup_laneq_s64::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_laneq_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vdup_laneq_u64::<1>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_s8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x8 = i8x8::new(1, 2, 2, 2, 2, 2, 2, 2);
-        let r: i8x8 = transmute(vext_s8::<7>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_s8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x16 = i8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let r: i8x16 = transmute(vextq_s8::<15>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(1, 2, 2, 2);
-        let r: i16x4 = transmute(vext_s16::<3>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(1, 2, 2, 2, 2, 2, 2, 2);
-        let r: i16x8 = transmute(vextq_s16::<7>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vext_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(1, 2, 2, 2);
-        let r: i32x4 = transmute(vextq_s32::<3>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_u8() {
-        let a: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x8 = u8x8::new(1, 2, 2, 2, 2, 2, 2, 2);
-        let r: u8x8 = transmute(vext_u8::<7>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_u8() {
-        let a: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x16 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let r: u8x16 = transmute(vextq_u8::<15>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_u16() {
-        let a: u16x4 = u16x4::new(1, 1, 1, 1);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let e: u16x4 = u16x4::new(1, 2, 2, 2);
-        let r: u16x4 = transmute(vext_u16::<3>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_u16() {
-        let a: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u16x8 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2);
-        let r: u16x8 = transmute(vextq_u16::<7>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let b: u32x2 = u32x2::new(2, 2);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vext_u32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 1);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let e: u32x4 = u32x4::new(1, 2, 2, 2);
-        let r: u32x4 = transmute(vextq_u32::<3>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_p8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x8 = i8x8::new(1, 2, 2, 2, 2, 2, 2, 2);
-        let r: i8x8 = transmute(vext_p8::<7>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_p8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x16 = i8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let r: i8x16 = transmute(vextq_p8::<15>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_p16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(1, 2, 2, 2);
-        let r: i16x4 = transmute(vext_p16::<3>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_p16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(1, 2, 2, 2, 2, 2, 2, 2);
-        let r: i16x8 = transmute(vextq_p16::<7>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_s64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vextq_s64::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_u64() {
-        let a: u64x2 = u64x2::new(1, 1);
-        let b: u64x2 = u64x2::new(2, 2);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vextq_u64::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_f32() {
-        let a: f32x2 = f32x2::new(1., 1.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let e: f32x2 = f32x2::new(1., 2.);
-        let r: f32x2 = transmute(vext_f32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vextq_f32() {
-        let a: f32x4 = f32x4::new(1., 1., 1., 1.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let e: f32x4 = f32x4::new(1., 2., 2., 2.);
-        let r: f32x4 = transmute(vextq_f32::<3>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i8x8 = transmute(vmla_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
-        let r: i8x16 = transmute(vmlaq_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(3, 3, 3, 3);
-        let e: i16x4 = i16x4::new(6, 7, 8, 9);
-        let r: i16x4 = transmute(vmla_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlaq_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(3, 3);
-        let e: i32x2 = i32x2::new(6, 7);
-        let r: i32x2 = transmute(vmla_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(3, 3, 3, 3);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlaq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u8x8 = transmute(vmla_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
-        let r: u8x16 = transmute(vmlaq_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(3, 3, 3, 3);
-        let e: u16x4 = u16x4::new(6, 7, 8, 9);
-        let r: u16x4 = transmute(vmla_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlaq_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(3, 3);
-        let e: u32x2 = u32x2::new(6, 7);
-        let r: u32x2 = transmute(vmla_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(3, 3, 3, 3);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlaq_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32x2 = f32x2::new(3., 3.);
-        let e: f32x2 = f32x2::new(6., 7.);
-        let r: f32x2 = transmute(vmla_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_f32() {
-        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32x4 = f32x4::new(3., 3., 3., 3.);
-        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let r: f32x4 = transmute(vmlaq_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16 = 3;
-        let e: i16x4 = i16x4::new(6, 7, 8, 9);
-        let r: i16x4 = transmute(vmla_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16 = 3;
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlaq_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32 = 3;
-        let e: i32x2 = i32x2::new(6, 7);
-        let r: i32x2 = transmute(vmla_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32 = 3;
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlaq_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16 = 3;
-        let e: u16x4 = u16x4::new(6, 7, 8, 9);
-        let r: u16x4 = transmute(vmla_n_u16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16 = 3;
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlaq_n_u16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32 = 3;
-        let e: u32x2 = u32x2::new(6, 7);
-        let r: u32x2 = transmute(vmla_n_u32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32 = 3;
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlaq_n_u32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_n_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32 = 3.;
-        let e: f32x2 = f32x2::new(6., 7.);
-        let r: f32x2 = transmute(vmla_n_f32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_n_f32() {
-        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32 = 3.;
-        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let r: f32x4 = transmute(vmlaq_n_f32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i16x4 = i16x4::new(6, 7, 8, 9);
-        let r: i16x4 = transmute(vmla_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(6, 7, 8, 9);
-        let r: i16x4 = transmute(vmla_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlaq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlaq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(0, 3);
-        let e: i32x2 = i32x2::new(6, 7);
-        let r: i32x2 = transmute(vmla_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i32x2 = i32x2::new(6, 7);
-        let r: i32x2 = transmute(vmla_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x2 = i32x2::new(0, 3);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlaq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlaq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u16x4 = u16x4::new(6, 7, 8, 9);
-        let r: u16x4 = transmute(vmla_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u16x4 = u16x4::new(6, 7, 8, 9);
-        let r: u16x4 = transmute(vmla_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlaq_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlaq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(0, 3);
-        let e: u32x2 = u32x2::new(6, 7);
-        let r: u32x2 = transmute(vmla_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u32x2 = u32x2::new(6, 7);
-        let r: u32x2 = transmute(vmla_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x2 = u32x2::new(0, 3);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlaq_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlaq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_lane_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32x2 = f32x2::new(0., 3.);
-        let e: f32x2 = f32x2::new(6., 7.);
-        let r: f32x2 = transmute(vmla_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmla_laneq_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32x4 = f32x4::new(0., 3., 0., 0.);
-        let e: f32x2 = f32x2::new(6., 7.);
-        let r: f32x2 = transmute(vmla_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_lane_f32() {
-        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32x2 = f32x2::new(0., 3.);
-        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let r: f32x4 = transmute(vmlaq_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlaq_laneq_f32() {
-        let a: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32x4 = f32x4::new(0., 3., 0., 0.);
-        let e: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let r: f32x4 = transmute(vmlaq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_s8() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: i16x8 = transmute(vmlal_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_s16() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(3, 3, 3, 3);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlal_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_s32() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(3, 3);
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vmlal_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_u8() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let r: u16x8 = transmute(vmlal_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_u16() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(3, 3, 3, 3);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlal_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_u32() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(3, 3);
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vmlal_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_n_s16() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16 = 3;
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlal_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_n_s32() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32 = 3;
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vmlal_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_n_u16() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16 = 3;
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlal_n_u16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_n_u32() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32 = 3;
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vmlal_n_u32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_lane_s16() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlal_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_laneq_s16() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(6, 7, 8, 9);
-        let r: i32x4 = transmute(vmlal_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_lane_s32() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(0, 3);
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vmlal_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_laneq_s32() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i64x2 = i64x2::new(6, 7);
-        let r: i64x2 = transmute(vmlal_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_lane_u16() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlal_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_laneq_u16() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(6, 7, 8, 9);
-        let r: u32x4 = transmute(vmlal_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_lane_u32() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(0, 3);
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vmlal_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlal_laneq_u32() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u64x2 = u64x2::new(6, 7);
-        let r: u64x2 = transmute(vmlal_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_s8() {
-        let a: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vmls_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_s8() {
-        let a: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vmlsq_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_s16() {
-        let a: i16x4 = i16x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(3, 3, 3, 3);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vmls_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_s16() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsq_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_s32() {
-        let a: i32x2 = i32x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(3, 3);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vmls_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_s32() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(3, 3, 3, 3);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_u8() {
-        let a: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u8x8 = transmute(vmls_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_u8() {
-        let a: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
-        let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vmlsq_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_u16() {
-        let a: u16x4 = u16x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(3, 3, 3, 3);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vmls_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_u16() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsq_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_u32() {
-        let a: u32x2 = u32x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(3, 3);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vmls_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_u32() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(3, 3, 3, 3);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsq_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_f32() {
-        let a: f32x2 = f32x2::new(6., 7.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32x2 = f32x2::new(3., 3.);
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vmls_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_f32() {
-        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32x4 = f32x4::new(3., 3., 3., 3.);
-        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let r: f32x4 = transmute(vmlsq_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_s16() {
-        let a: i16x4 = i16x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16 = 3;
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vmls_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_s16() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16 = 3;
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsq_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_s32() {
-        let a: i32x2 = i32x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32 = 3;
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vmls_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_s32() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32 = 3;
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsq_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_u16() {
-        let a: u16x4 = u16x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16 = 3;
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vmls_n_u16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_u16() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16 = 3;
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsq_n_u16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_u32() {
-        let a: u32x2 = u32x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32 = 3;
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vmls_n_u32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_u32() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32 = 3;
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsq_n_u32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_n_f32() {
-        let a: f32x2 = f32x2::new(6., 7.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32 = 3.;
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vmls_n_f32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_n_f32() {
-        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32 = 3.;
-        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let r: f32x4 = transmute(vmlsq_n_f32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_s16() {
-        let a: i16x4 = i16x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vmls_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_s16() {
-        let a: i16x4 = i16x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vmls_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_s16() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_s16() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_s32() {
-        let a: i32x2 = i32x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(0, 3);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vmls_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_s32() {
-        let a: i32x2 = i32x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vmls_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_s32() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x2 = i32x2::new(0, 3);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_s32() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_u16() {
-        let a: u16x4 = u16x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vmls_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_u16() {
-        let a: u16x4 = u16x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vmls_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_u16() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsq_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_u16() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_u32() {
-        let a: u32x2 = u32x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(0, 3);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vmls_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_u32() {
-        let a: u32x2 = u32x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vmls_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_u32() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x2 = u32x2::new(0, 3);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsq_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_u32() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u32x4 = u32x4::new(2, 2, 2, 2);
-        let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_lane_f32() {
-        let a: f32x2 = f32x2::new(6., 7.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32x2 = f32x2::new(0., 3.);
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vmls_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmls_laneq_f32() {
-        let a: f32x2 = f32x2::new(6., 7.);
-        let b: f32x2 = f32x2::new(2., 2.);
-        let c: f32x4 = f32x4::new(0., 3., 0., 0.);
-        let e: f32x2 = f32x2::new(0., 1.);
-        let r: f32x2 = transmute(vmls_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_lane_f32() {
-        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32x2 = f32x2::new(0., 3.);
-        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let r: f32x4 = transmute(vmlsq_lane_f32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsq_laneq_f32() {
-        let a: f32x4 = f32x4::new(6., 7., 8., 9.);
-        let b: f32x4 = f32x4::new(2., 2., 2., 2.);
-        let c: f32x4 = f32x4::new(0., 3., 0., 0.);
-        let e: f32x4 = f32x4::new(0., 1., 2., 3.);
-        let r: f32x4 = transmute(vmlsq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_s8() {
-        let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vmlsl_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_s16() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(3, 3, 3, 3);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsl_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_s32() {
-        let a: i64x2 = i64x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(3, 3);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vmlsl_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_u8() {
-        let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
-        let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vmlsl_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_u16() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(3, 3, 3, 3);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsl_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_u32() {
-        let a: u64x2 = u64x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(3, 3);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vmlsl_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_n_s16() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16 = 3;
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsl_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_n_s32() {
-        let a: i64x2 = i64x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32 = 3;
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vmlsl_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_n_u16() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16 = 3;
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsl_n_u16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_n_u32() {
-        let a: u64x2 = u64x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32 = 3;
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vmlsl_n_u32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_lane_s16() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x4 = i16x4::new(0, 3, 0, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsl_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_laneq_s16() {
-        let a: i32x4 = i32x4::new(6, 7, 8, 9);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vmlsl_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_lane_s32() {
-        let a: i64x2 = i64x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x2 = i32x2::new(0, 3);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vmlsl_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_laneq_s32() {
-        let a: i64x2 = i64x2::new(6, 7);
-        let b: i32x2 = i32x2::new(2, 2);
-        let c: i32x4 = i32x4::new(0, 3, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vmlsl_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_lane_u16() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x4 = u16x4::new(0, 3, 0, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsl_lane_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_laneq_u16() {
-        let a: u32x4 = u32x4::new(6, 7, 8, 9);
-        let b: u16x4 = u16x4::new(2, 2, 2, 2);
-        let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vmlsl_laneq_u16::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_lane_u32() {
-        let a: u64x2 = u64x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x2 = u32x2::new(0, 3);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vmlsl_lane_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmlsl_laneq_u32() {
-        let a: u64x2 = u64x2::new(6, 7);
-        let b: u32x2 = u32x2::new(2, 2);
-        let c: u32x4 = u32x4::new(0, 3, 0, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vmlsl_laneq_u32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_s8() {
-        let a: i8x8 = i8x8::new(0, 1, -1, 2, -2, 3, -3, 4);
-        let e: i8x8 = i8x8::new(0, -1, 1, -2, 2, -3, 3, -4);
-        let r: i8x8 = transmute(vneg_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_s8() {
-        let a: i8x16 = i8x16::new(0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8);
-        let e: i8x16 = i8x16::new(0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8);
-        let r: i8x16 = transmute(vnegq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_s16() {
-        let a: i16x4 = i16x4::new(0, 1, -1, 2);
-        let e: i16x4 = i16x4::new(0, -1, 1, -2);
-        let r: i16x4 = transmute(vneg_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, -1, 2, -2, 3, -3, 4);
-        let e: i16x8 = i16x8::new(0, -1, 1, -2, 2, -3, 3, -4);
-        let r: i16x8 = transmute(vnegq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: i32x2 = i32x2::new(0, -1);
-        let r: i32x2 = transmute(vneg_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, -1, 2);
-        let e: i32x4 = i32x4::new(0, -1, 1, -2);
-        let r: i32x4 = transmute(vnegq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vneg_f32() {
-        let a: f32x2 = f32x2::new(0., 1.);
-        let e: f32x2 = f32x2::new(0., -1.);
-        let r: f32x2 = transmute(vneg_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vnegq_f32() {
-        let a: f32x4 = f32x4::new(0., 1., -1., 2.);
-        let e: f32x4 = f32x4::new(0., -1., 1., -2.);
-        let r: f32x4 = transmute(vnegq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqneg_s8() {
-        let a: i8x8 = i8x8::new(-128, 0, 1, -1, 2, -2, 3, -3);
-        let e: i8x8 = i8x8::new(0x7F, 0, -1, 1, -2, 2, -3, 3);
-        let r: i8x8 = transmute(vqneg_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegq_s8() {
-        let a: i8x16 = i8x16::new(-128, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7);
-        let e: i8x16 = i8x16::new(0x7F, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
-        let r: i8x16 = transmute(vqnegq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqneg_s16() {
-        let a: i16x4 = i16x4::new(-32768, 0, 1, -1);
-        let e: i16x4 = i16x4::new(0x7F_FF, 0, -1, 1);
-        let r: i16x4 = transmute(vqneg_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegq_s16() {
-        let a: i16x8 = i16x8::new(-32768, 0, 1, -1, 2, -2, 3, -3);
-        let e: i16x8 = i16x8::new(0x7F_FF, 0, -1, 1, -2, 2, -3, 3);
-        let r: i16x8 = transmute(vqnegq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqneg_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, 0);
-        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0);
-        let r: i32x2 = transmute(vqneg_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqnegq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 0, 1, -1);
-        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, -1, 1);
-        let r: i32x4 = transmute(vqnegq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u8() {
-        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(41, 40, 39, 38, 37, 36, 35, 34);
-        let r: u8x8 = transmute(vqsub_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u8() {
-        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26);
-        let r: u8x16 = transmute(vqsubq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u16() {
-        let a: u16x4 = u16x4::new(42, 42, 42, 42);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(41, 40, 39, 38);
-        let r: u16x4 = transmute(vqsub_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u16() {
-        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(41, 40, 39, 38, 37, 36, 35, 34);
-        let r: u16x8 = transmute(vqsubq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u32() {
-        let a: u32x2 = u32x2::new(42, 42);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(41, 40);
-        let r: u32x2 = transmute(vqsub_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u32() {
-        let a: u32x4 = u32x4::new(42, 42, 42, 42);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(41, 40, 39, 38);
-        let r: u32x4 = transmute(vqsubq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u64() {
-        let a: u64x1 = u64x1::new(42);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(41);
-        let r: u64x1 = transmute(vqsub_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u64() {
-        let a: u64x2 = u64x2::new(42, 42);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(41, 40);
-        let r: u64x2 = transmute(vqsubq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s8() {
-        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(41, 40, 39, 38, 37, 36, 35, 34);
-        let r: i8x8 = transmute(vqsub_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s8() {
-        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26);
-        let r: i8x16 = transmute(vqsubq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s16() {
-        let a: i16x4 = i16x4::new(42, 42, 42, 42);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(41, 40, 39, 38);
-        let r: i16x4 = transmute(vqsub_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s16() {
-        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(41, 40, 39, 38, 37, 36, 35, 34);
-        let r: i16x8 = transmute(vqsubq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s32() {
-        let a: i32x2 = i32x2::new(42, 42);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(41, 40);
-        let r: i32x2 = transmute(vqsub_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s32() {
-        let a: i32x4 = i32x4::new(42, 42, 42, 42);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(41, 40, 39, 38);
-        let r: i32x4 = transmute(vqsubq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s64() {
-        let a: i64x1 = i64x1::new(42);
-        let b: i64x1 = i64x1::new(1);
-        let e: i64x1 = i64x1::new(41);
-        let r: i64x1 = transmute(vqsub_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s64() {
-        let a: i64x2 = i64x2::new(42, 42);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: i64x2 = i64x2::new(41, 40);
-        let r: i64x2 = transmute(vqsubq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_u8() {
-        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(21, 22, 22, 23, 23, 24, 24, 25);
-        let r: u8x8 = transmute(vhadd_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_u8() {
-        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29);
-        let r: u8x16 = transmute(vhaddq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_u16() {
-        let a: u16x4 = u16x4::new(42, 42, 42, 42);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(21, 22, 22, 23);
-        let r: u16x4 = transmute(vhadd_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_u16() {
-        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(21, 22, 22, 23, 23, 24, 24, 25);
-        let r: u16x8 = transmute(vhaddq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_u32() {
-        let a: u32x2 = u32x2::new(42, 42);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(21, 22);
-        let r: u32x2 = transmute(vhadd_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_u32() {
-        let a: u32x4 = u32x4::new(42, 42, 42, 42);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(21, 22, 22, 23);
-        let r: u32x4 = transmute(vhaddq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_s8() {
-        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(21, 22, 22, 23, 23, 24, 24, 25);
-        let r: i8x8 = transmute(vhadd_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_s8() {
-        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29);
-        let r: i8x16 = transmute(vhaddq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_s16() {
-        let a: i16x4 = i16x4::new(42, 42, 42, 42);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(21, 22, 22, 23);
-        let r: i16x4 = transmute(vhadd_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_s16() {
-        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(21, 22, 22, 23, 23, 24, 24, 25);
-        let r: i16x8 = transmute(vhaddq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_s32() {
-        let a: i32x2 = i32x2::new(42, 42);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(21, 22);
-        let r: i32x2 = transmute(vhadd_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_s32() {
-        let a: i32x4 = i32x4::new(42, 42, 42, 42);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(21, 22, 22, 23);
-        let r: i32x4 = transmute(vhaddq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_u8() {
-        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(22, 22, 23, 23, 24, 24, 25, 25);
-        let r: u8x8 = transmute(vrhadd_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_u8() {
-        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29);
-        let r: u8x16 = transmute(vrhaddq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_u16() {
-        let a: u16x4 = u16x4::new(42, 42, 42, 42);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(22, 22, 23, 23);
-        let r: u16x4 = transmute(vrhadd_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_u16() {
-        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(22, 22, 23, 23, 24, 24, 25, 25);
-        let r: u16x8 = transmute(vrhaddq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_u32() {
-        let a: u32x2 = u32x2::new(42, 42);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(22, 22);
-        let r: u32x2 = transmute(vrhadd_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_u32() {
-        let a: u32x4 = u32x4::new(42, 42, 42, 42);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(22, 22, 23, 23);
-        let r: u32x4 = transmute(vrhaddq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_s8() {
-        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(22, 22, 23, 23, 24, 24, 25, 25);
-        let r: i8x8 = transmute(vrhadd_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_s8() {
-        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29);
-        let r: i8x16 = transmute(vrhaddq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_s16() {
-        let a: i16x4 = i16x4::new(42, 42, 42, 42);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(22, 22, 23, 23);
-        let r: i16x4 = transmute(vrhadd_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_s16() {
-        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(22, 22, 23, 23, 24, 24, 25, 25);
-        let r: i16x8 = transmute(vrhaddq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_s32() {
-        let a: i32x2 = i32x2::new(42, 42);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(22, 22);
-        let r: i32x2 = transmute(vrhadd_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_s32() {
-        let a: i32x4 = i32x4::new(42, 42, 42, 42);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(22, 22, 23, 23);
-        let r: i32x4 = transmute(vrhaddq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vrndn_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndn_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vrndnq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
-        let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u8() {
-        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(43, 44, 45, 46, 47, 48, 49, 50);
-        let r: u8x8 = transmute(vqadd_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u8() {
-        let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58);
-        let r: u8x16 = transmute(vqaddq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u16() {
-        let a: u16x4 = u16x4::new(42, 42, 42, 42);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(43, 44, 45, 46);
-        let r: u16x4 = transmute(vqadd_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u16() {
-        let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(43, 44, 45, 46, 47, 48, 49, 50);
-        let r: u16x8 = transmute(vqaddq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u32() {
-        let a: u32x2 = u32x2::new(42, 42);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(43, 44);
-        let r: u32x2 = transmute(vqadd_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u32() {
-        let a: u32x4 = u32x4::new(42, 42, 42, 42);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(43, 44, 45, 46);
-        let r: u32x4 = transmute(vqaddq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u64() {
-        let a: u64x1 = u64x1::new(42);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(43);
-        let r: u64x1 = transmute(vqadd_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u64() {
-        let a: u64x2 = u64x2::new(42, 42);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(43, 44);
-        let r: u64x2 = transmute(vqaddq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s8() {
-        let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(43, 44, 45, 46, 47, 48, 49, 50);
-        let r: i8x8 = transmute(vqadd_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s8() {
-        let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58);
-        let r: i8x16 = transmute(vqaddq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s16() {
-        let a: i16x4 = i16x4::new(42, 42, 42, 42);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(43, 44, 45, 46);
-        let r: i16x4 = transmute(vqadd_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s16() {
-        let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(43, 44, 45, 46, 47, 48, 49, 50);
-        let r: i16x8 = transmute(vqaddq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s32() {
-        let a: i32x2 = i32x2::new(42, 42);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(43, 44);
-        let r: i32x2 = transmute(vqadd_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s32() {
-        let a: i32x4 = i32x4::new(42, 42, 42, 42);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(43, 44, 45, 46);
-        let r: i32x4 = transmute(vqaddq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s64() {
-        let a: i64x1 = i64x1::new(42);
-        let b: i64x1 = i64x1::new(1);
-        let e: i64x1 = i64x1::new(43);
-        let r: i64x1 = transmute(vqadd_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s64() {
-        let a: i64x2 = i64x2::new(42, 42);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: i64x2 = i64x2::new(43, 44);
-        let r: i64x2 = transmute(vqaddq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s8_x2() {
-        let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i8x8; 2] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
-        let r: [i8x8; 2] = transmute(vld1_s8_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s16_x2() {
-        let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i16x4; 2] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8)];
-        let r: [i16x4; 2] = transmute(vld1_s16_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s32_x2() {
-        let a: [i32; 5] = [0, 1, 2, 3, 4];
-        let e: [i32x2; 2] = [i32x2::new(1, 2), i32x2::new(3, 4)];
-        let r: [i32x2; 2] = transmute(vld1_s32_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s64_x2() {
-        let a: [i64; 3] = [0, 1, 2];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
-        let r: [i64x1; 2] = transmute(vld1_s64_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s8_x2() {
-        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i8x16; 2] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [i8x16; 2] = transmute(vld1q_s8_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s16_x2() {
-        let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i16x8; 2] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
-        let r: [i16x8; 2] = transmute(vld1q_s16_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s32_x2() {
-        let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i32x4; 2] = [i32x4::new(1, 2, 3, 4), i32x4::new(5, 6, 7, 8)];
-        let r: [i32x4; 2] = transmute(vld1q_s32_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s64_x2() {
-        let a: [i64; 5] = [0, 1, 2, 3, 4];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(3, 4)];
-        let r: [i64x2; 2] = transmute(vld1q_s64_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s8_x3() {
-        let a: [i8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [i8x8; 3] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
-        let r: [i8x8; 3] = transmute(vld1_s8_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s16_x3() {
-        let a: [i16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [i16x4; 3] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12)];
-        let r: [i16x4; 3] = transmute(vld1_s16_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s32_x3() {
-        let a: [i32; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [i32x2; 3] = [i32x2::new(1, 2), i32x2::new(3, 4), i32x2::new(5, 6)];
-        let r: [i32x2; 3] = transmute(vld1_s32_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s64_x3() {
-        let a: [i64; 4] = [0, 1, 2, 3];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(3)];
-        let r: [i64x1; 3] = transmute(vld1_s64_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s8_x3() {
-        let a: [i8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i8x16; 3] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)];
-        let r: [i8x16; 3] = transmute(vld1q_s8_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s16_x3() {
-        let a: [i16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [i16x8; 3] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
-        let r: [i16x8; 3] = transmute(vld1q_s16_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s32_x3() {
-        let a: [i32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [i32x4; 3] = [i32x4::new(1, 2, 3, 4), i32x4::new(5, 6, 7, 8), i32x4::new(9, 10, 11, 12)];
-        let r: [i32x4; 3] = transmute(vld1q_s32_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s64_x3() {
-        let a: [i64; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6)];
-        let r: [i64x2; 3] = transmute(vld1q_s64_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s8_x4() {
-        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i8x8; 4] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24), i8x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [i8x8; 4] = transmute(vld1_s8_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s16_x4() {
-        let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i16x4; 4] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12), i16x4::new(13, 14, 15, 16)];
-        let r: [i16x4; 4] = transmute(vld1_s16_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s32_x4() {
-        let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i32x2; 4] = [i32x2::new(1, 2), i32x2::new(3, 4), i32x2::new(5, 6), i32x2::new(7, 8)];
-        let r: [i32x2; 4] = transmute(vld1_s32_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_s64_x4() {
-        let a: [i64; 5] = [0, 1, 2, 3, 4];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(3), i64x1::new(4)];
-        let r: [i64x1; 4] = transmute(vld1_s64_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s8_x4() {
-        let a: [i8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i8x16; 4] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [i8x16; 4] = transmute(vld1q_s8_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s16_x4() {
-        let a: [i16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i16x8; 4] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24), i16x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [i16x8; 4] = transmute(vld1q_s16_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s32_x4() {
-        let a: [i32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i32x4; 4] = [i32x4::new(1, 2, 3, 4), i32x4::new(5, 6, 7, 8), i32x4::new(9, 10, 11, 12), i32x4::new(13, 14, 15, 16)];
-        let r: [i32x4; 4] = transmute(vld1q_s32_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_s64_x4() {
-        let a: [i64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6), i64x2::new(7, 8)];
-        let r: [i64x2; 4] = transmute(vld1q_s64_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u8_x2() {
-        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u8x8; 2] = [u8x8::new(1, 2, 3, 4, 5, 6, 7, 8), u8x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
-        let r: [u8x8; 2] = transmute(vld1_u8_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u16_x2() {
-        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u16x4; 2] = [u16x4::new(1, 2, 3, 4), u16x4::new(5, 6, 7, 8)];
-        let r: [u16x4; 2] = transmute(vld1_u16_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u32_x2() {
-        let a: [u32; 5] = [0, 1, 2, 3, 4];
-        let e: [u32x2; 2] = [u32x2::new(1, 2), u32x2::new(3, 4)];
-        let r: [u32x2; 2] = transmute(vld1_u32_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u64_x2() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(2)];
-        let r: [u64x1; 2] = transmute(vld1_u64_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u8_x2() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u8x16; 2] = [u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [u8x16; 2] = transmute(vld1q_u8_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u16_x2() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u16x8; 2] = [u16x8::new(1, 2, 3, 4, 5, 6, 7, 8), u16x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
-        let r: [u16x8; 2] = transmute(vld1q_u16_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u32_x2() {
-        let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u32x4; 2] = [u32x4::new(1, 2, 3, 4), u32x4::new(5, 6, 7, 8)];
-        let r: [u32x4; 2] = transmute(vld1q_u32_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u64_x2() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(3, 4)];
-        let r: [u64x2; 2] = transmute(vld1q_u64_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u8_x3() {
-        let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [u8x8; 3] = [u8x8::new(1, 2, 3, 4, 5, 6, 7, 8), u8x8::new(9, 10, 11, 12, 13, 14, 15, 16), u8x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
-        let r: [u8x8; 3] = transmute(vld1_u8_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u16_x3() {
-        let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [u16x4; 3] = [u16x4::new(1, 2, 3, 4), u16x4::new(5, 6, 7, 8), u16x4::new(9, 10, 11, 12)];
-        let r: [u16x4; 3] = transmute(vld1_u16_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u32_x3() {
-        let a: [u32; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [u32x2; 3] = [u32x2::new(1, 2), u32x2::new(3, 4), u32x2::new(5, 6)];
-        let r: [u32x2; 3] = transmute(vld1_u32_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u64_x3() {
-        let a: [u64; 4] = [0, 1, 2, 3];
-        let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(2), u64x1::new(3)];
-        let r: [u64x1; 3] = transmute(vld1_u64_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u8_x3() {
-        let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u8x16; 3] = [u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)];
-        let r: [u8x16; 3] = transmute(vld1q_u8_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u16_x3() {
-        let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [u16x8; 3] = [u16x8::new(1, 2, 3, 4, 5, 6, 7, 8), u16x8::new(9, 10, 11, 12, 13, 14, 15, 16), u16x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
-        let r: [u16x8; 3] = transmute(vld1q_u16_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u32_x3() {
-        let a: [u32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [u32x4; 3] = [u32x4::new(1, 2, 3, 4), u32x4::new(5, 6, 7, 8), u32x4::new(9, 10, 11, 12)];
-        let r: [u32x4; 3] = transmute(vld1q_u32_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u64_x3() {
-        let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(3, 4), u64x2::new(5, 6)];
-        let r: [u64x2; 3] = transmute(vld1q_u64_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u8_x4() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u8x8; 4] = [u8x8::new(1, 2, 3, 4, 5, 6, 7, 8), u8x8::new(9, 10, 11, 12, 13, 14, 15, 16), u8x8::new(17, 18, 19, 20, 21, 22, 23, 24), u8x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [u8x8; 4] = transmute(vld1_u8_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u16_x4() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u16x4; 4] = [u16x4::new(1, 2, 3, 4), u16x4::new(5, 6, 7, 8), u16x4::new(9, 10, 11, 12), u16x4::new(13, 14, 15, 16)];
-        let r: [u16x4; 4] = transmute(vld1_u16_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u32_x4() {
-        let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u32x2; 4] = [u32x2::new(1, 2), u32x2::new(3, 4), u32x2::new(5, 6), u32x2::new(7, 8)];
-        let r: [u32x2; 4] = transmute(vld1_u32_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_u64_x4() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(2), u64x1::new(3), u64x1::new(4)];
-        let r: [u64x1; 4] = transmute(vld1_u64_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u8_x4() {
-        let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u8x16; 4] = [u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [u8x16; 4] = transmute(vld1q_u8_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u16_x4() {
-        let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u16x8; 4] = [u16x8::new(1, 2, 3, 4, 5, 6, 7, 8), u16x8::new(9, 10, 11, 12, 13, 14, 15, 16), u16x8::new(17, 18, 19, 20, 21, 22, 23, 24), u16x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [u16x8; 4] = transmute(vld1q_u16_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u32_x4() {
-        let a: [u32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u32x4; 4] = [u32x4::new(1, 2, 3, 4), u32x4::new(5, 6, 7, 8), u32x4::new(9, 10, 11, 12), u32x4::new(13, 14, 15, 16)];
-        let r: [u32x4; 4] = transmute(vld1q_u32_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_u64_x4() {
-        let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(3, 4), u64x2::new(5, 6), u64x2::new(7, 8)];
-        let r: [u64x2; 4] = transmute(vld1q_u64_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_p8_x2() {
-        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i8x8; 2] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
-        let r: [i8x8; 2] = transmute(vld1_p8_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_p8_x3() {
-        let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [i8x8; 3] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
-        let r: [i8x8; 3] = transmute(vld1_p8_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_p8_x4() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i8x8; 4] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24), i8x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [i8x8; 4] = transmute(vld1_p8_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_p8_x2() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i8x16; 2] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [i8x16; 2] = transmute(vld1q_p8_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_p8_x3() {
-        let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i8x16; 3] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)];
-        let r: [i8x16; 3] = transmute(vld1q_p8_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_p8_x4() {
-        let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i8x16; 4] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [i8x16; 4] = transmute(vld1q_p8_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_p16_x2() {
-        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i16x4; 2] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8)];
-        let r: [i16x4; 2] = transmute(vld1_p16_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_p16_x3() {
-        let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [i16x4; 3] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12)];
-        let r: [i16x4; 3] = transmute(vld1_p16_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_p16_x4() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i16x4; 4] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12), i16x4::new(13, 14, 15, 16)];
-        let r: [i16x4; 4] = transmute(vld1_p16_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_p16_x2() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i16x8; 2] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16)];
-        let r: [i16x8; 2] = transmute(vld1q_p16_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_p16_x3() {
-        let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [i16x8; 3] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24)];
-        let r: [i16x8; 3] = transmute(vld1q_p16_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_p16_x4() {
-        let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i16x8; 4] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24), i16x8::new(25, 26, 27, 28, 29, 30, 31, 32)];
-        let r: [i16x8; 4] = transmute(vld1q_p16_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_p64_x2() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
-        let r: [i64x1; 2] = transmute(vld1_p64_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_p64_x3() {
-        let a: [u64; 4] = [0, 1, 2, 3];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(3)];
-        let r: [i64x1; 3] = transmute(vld1_p64_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_p64_x4() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(3), i64x1::new(4)];
-        let r: [i64x1; 4] = transmute(vld1_p64_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_p64_x2() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(3, 4)];
-        let r: [i64x2; 2] = transmute(vld1q_p64_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_p64_x3() {
-        let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6)];
-        let r: [i64x2; 3] = transmute(vld1q_p64_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_p64_x4() {
-        let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6), i64x2::new(7, 8)];
-        let r: [i64x2; 4] = transmute(vld1q_p64_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f32_x2() {
-        let a: [f32; 5] = [0., 1., 2., 3., 4.];
-        let e: [f32x2; 2] = [f32x2::new(1., 2.), f32x2::new(3., 4.)];
-        let r: [f32x2; 2] = transmute(vld1_f32_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f32_x2() {
-        let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
-        let e: [f32x4; 2] = [f32x4::new(1., 2., 3., 4.), f32x4::new(5., 6., 7., 8.)];
-        let r: [f32x4; 2] = transmute(vld1q_f32_x2(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f32_x3() {
-        let a: [f32; 7] = [0., 1., 2., 3., 4., 5., 6.];
-        let e: [f32x2; 3] = [f32x2::new(1., 2.), f32x2::new(3., 4.), f32x2::new(5., 6.)];
-        let r: [f32x2; 3] = transmute(vld1_f32_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f32_x3() {
-        let a: [f32; 13] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.];
-        let e: [f32x4; 3] = [f32x4::new(1., 2., 3., 4.), f32x4::new(5., 6., 7., 8.), f32x4::new(9., 10., 11., 12.)];
-        let r: [f32x4; 3] = transmute(vld1q_f32_x3(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f32_x4() {
-        let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
-        let e: [f32x2; 4] = [f32x2::new(1., 2.), f32x2::new(3., 4.), f32x2::new(5., 6.), f32x2::new(7., 8.)];
-        let r: [f32x2; 4] = transmute(vld1_f32_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f32_x4() {
-        let a: [f32; 17] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.];
-        let e: [f32x4; 4] = [f32x4::new(1., 2., 3., 4.), f32x4::new(5., 6., 7., 8.), f32x4::new(9., 10., 11., 12.), f32x4::new(13., 14., 15., 16.)];
-        let r: [f32x4; 4] = transmute(vld1q_f32_x4(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_s8() {
-        let a: [i8; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i8x8; 2] = [i8x8::new(1, 2, 2, 3, 2, 3, 4, 5), i8x8::new(2, 3, 4, 5, 6, 7, 8, 9)];
-        let r: [i8x8; 2] = transmute(vld2_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_s16() {
-        let a: [i16; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5];
-        let e: [i16x4; 2] = [i16x4::new(1, 2, 2, 3), i16x4::new(2, 3, 4, 5)];
-        let r: [i16x4; 2] = transmute(vld2_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_s32() {
-        let a: [i32; 5] = [0, 1, 2, 2, 3];
-        let e: [i32x2; 2] = [i32x2::new(1, 2), i32x2::new(2, 3)];
-        let r: [i32x2; 2] = transmute(vld2_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_s8() {
-        let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17];
-        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9), i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)];
-        let r: [i8x16; 2] = transmute(vld2q_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_s16() {
-        let a: [i16; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i16x8; 2] = [i16x8::new(1, 2, 2, 3, 2, 3, 4, 5), i16x8::new(2, 3, 4, 5, 6, 7, 8, 9)];
-        let r: [i16x8; 2] = transmute(vld2q_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_s32() {
-        let a: [i32; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5];
-        let e: [i32x4; 2] = [i32x4::new(1, 2, 2, 3), i32x4::new(2, 3, 4, 5)];
-        let r: [i32x4; 2] = transmute(vld2q_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_s64() {
-        let a: [i64; 3] = [0, 1, 2];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
-        let r: [i64x1; 2] = transmute(vld2_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_u8() {
-        let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let e: [u8x8; 2] = [u8x8::new(1, 2, 2, 3, 2, 3, 4, 5), u8x8::new(2, 3, 4, 5, 6, 7, 8, 9)];
-        let r: [u8x8; 2] = transmute(vld2_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_u16() {
-        let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5];
-        let e: [u16x4; 2] = [u16x4::new(1, 2, 2, 3), u16x4::new(2, 3, 4, 5)];
-        let r: [u16x4; 2] = transmute(vld2_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_u32() {
-        let a: [u32; 5] = [0, 1, 2, 2, 3];
-        let e: [u32x2; 2] = [u32x2::new(1, 2), u32x2::new(2, 3)];
-        let r: [u32x2; 2] = transmute(vld2_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_u8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17];
-        let e: [u8x16; 2] = [u8x16::new(1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9), u8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)];
-        let r: [u8x16; 2] = transmute(vld2q_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_u16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let e: [u16x8; 2] = [u16x8::new(1, 2, 2, 3, 2, 3, 4, 5), u16x8::new(2, 3, 4, 5, 6, 7, 8, 9)];
-        let r: [u16x8; 2] = transmute(vld2q_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_u32() {
-        let a: [u32; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5];
-        let e: [u32x4; 2] = [u32x4::new(1, 2, 2, 3), u32x4::new(2, 3, 4, 5)];
-        let r: [u32x4; 2] = transmute(vld2q_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_p8() {
-        let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i8x8; 2] = [i8x8::new(1, 2, 2, 3, 2, 3, 4, 5), i8x8::new(2, 3, 4, 5, 6, 7, 8, 9)];
-        let r: [i8x8; 2] = transmute(vld2_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_p16() {
-        let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5];
-        let e: [i16x4; 2] = [i16x4::new(1, 2, 2, 3), i16x4::new(2, 3, 4, 5)];
-        let r: [i16x4; 2] = transmute(vld2_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_p8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17];
-        let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9), i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)];
-        let r: [i8x16; 2] = transmute(vld2q_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_p16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i16x8; 2] = [i16x8::new(1, 2, 2, 3, 2, 3, 4, 5), i16x8::new(2, 3, 4, 5, 6, 7, 8, 9)];
-        let r: [i16x8; 2] = transmute(vld2q_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_u64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(2)];
-        let r: [u64x1; 2] = transmute(vld2_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_p64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)];
-        let r: [i64x1; 2] = transmute(vld2_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_f32() {
-        let a: [f32; 5] = [0., 1., 2., 2., 3.];
-        let e: [f32x2; 2] = [f32x2::new(1., 2.), f32x2::new(2., 3.)];
-        let r: [f32x2; 2] = transmute(vld2_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_f32() {
-        let a: [f32; 9] = [0., 1., 2., 2., 3., 2., 4., 3., 5.];
-        let e: [f32x4; 2] = [f32x4::new(1., 2., 2., 3.), f32x4::new(2., 3., 4., 5.)];
-        let r: [f32x4; 2] = transmute(vld2q_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_s8() {
-        let a: [i8; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i8x8; 2] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x8; 2] = transmute(vld2_dup_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_s16() {
-        let a: [i16; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5];
-        let e: [i16x4; 2] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)];
-        let r: [i16x4; 2] = transmute(vld2_dup_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_s32() {
-        let a: [i32; 5] = [0, 1, 1, 2, 3];
-        let e: [i32x2; 2] = [i32x2::new(1, 1), i32x2::new(1, 1)];
-        let r: [i32x2; 2] = transmute(vld2_dup_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_s8() {
-        let a: [i8; 33] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17];
-        let e: [i8x16; 2] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x16; 2] = transmute(vld2q_dup_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_s16() {
-        let a: [i16; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i16x8; 2] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i16x8; 2] = transmute(vld2q_dup_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_s32() {
-        let a: [i32; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5];
-        let e: [i32x4; 2] = [i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1)];
-        let r: [i32x4; 2] = transmute(vld2q_dup_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_s64() {
-        let a: [i64; 3] = [0, 1, 1];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(1)];
-        let r: [i64x1; 2] = transmute(vld2_dup_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_u8() {
-        let a: [u8; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9];
-        let e: [u8x8; 2] = [u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [u8x8; 2] = transmute(vld2_dup_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_u16() {
-        let a: [u16; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5];
-        let e: [u16x4; 2] = [u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1)];
-        let r: [u16x4; 2] = transmute(vld2_dup_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_u32() {
-        let a: [u32; 5] = [0, 1, 1, 2, 3];
-        let e: [u32x2; 2] = [u32x2::new(1, 1), u32x2::new(1, 1)];
-        let r: [u32x2; 2] = transmute(vld2_dup_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_u8() {
-        let a: [u8; 33] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17];
-        let e: [u8x16; 2] = [u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [u8x16; 2] = transmute(vld2q_dup_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_u16() {
-        let a: [u16; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9];
-        let e: [u16x8; 2] = [u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [u16x8; 2] = transmute(vld2q_dup_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_u32() {
-        let a: [u32; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5];
-        let e: [u32x4; 2] = [u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1)];
-        let r: [u32x4; 2] = transmute(vld2q_dup_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_p8() {
-        let a: [u8; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i8x8; 2] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x8; 2] = transmute(vld2_dup_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_p16() {
-        let a: [u16; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5];
-        let e: [i16x4; 2] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)];
-        let r: [i16x4; 2] = transmute(vld2_dup_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_p8() {
-        let a: [u8; 33] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17];
-        let e: [i8x16; 2] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x16; 2] = transmute(vld2q_dup_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_p16() {
-        let a: [u16; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i16x8; 2] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i16x8; 2] = transmute(vld2q_dup_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_u64() {
-        let a: [u64; 3] = [0, 1, 1];
-        let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(1)];
-        let r: [u64x1; 2] = transmute(vld2_dup_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_p64() {
-        let a: [u64; 3] = [0, 1, 1];
-        let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(1)];
-        let r: [i64x1; 2] = transmute(vld2_dup_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_dup_f32() {
-        let a: [f32; 5] = [0., 1., 1., 2., 3.];
-        let e: [f32x2; 2] = [f32x2::new(1., 1.), f32x2::new(1., 1.)];
-        let r: [f32x2; 2] = transmute(vld2_dup_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_dup_f32() {
-        let a: [f32; 9] = [0., 1., 1., 2., 3., 1., 4., 3., 5.];
-        let e: [f32x4; 2] = [f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.)];
-        let r: [f32x4; 2] = transmute(vld2q_dup_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_s8() {
-        let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x8; 2] = [i8x8::new(0, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i8x8; 2] = [i8x8::new(1, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i8x8; 2] = transmute(vld2_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_s16() {
-        let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x4; 2] = [i16x4::new(0, 2, 2, 14), i16x4::new(2, 16, 17, 18)];
-        let e: [i16x4; 2] = [i16x4::new(1, 2, 2, 14), i16x4::new(2, 16, 17, 18)];
-        let r: [i16x4; 2] = transmute(vld2_lane_s16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_s32() {
-        let a: [i32; 5] = [0, 1, 2, 3, 4];
-        let b: [i32x2; 2] = [i32x2::new(0, 2), i32x2::new(2, 14)];
-        let e: [i32x2; 2] = [i32x2::new(1, 2), i32x2::new(2, 14)];
-        let r: [i32x2; 2] = transmute(vld2_lane_s32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_s16() {
-        let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x8; 2] = [i16x8::new(0, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i16x8; 2] = [i16x8::new(1, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i16x8; 2] = transmute(vld2q_lane_s16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_s32() {
-        let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i32x4; 2] = [i32x4::new(0, 2, 2, 14), i32x4::new(2, 16, 17, 18)];
-        let e: [i32x4; 2] = [i32x4::new(1, 2, 2, 14), i32x4::new(2, 16, 17, 18)];
-        let r: [i32x4; 2] = transmute(vld2q_lane_s32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_u8() {
-        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u8x8; 2] = [u8x8::new(0, 2, 2, 14, 2, 16, 17, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [u8x8; 2] = [u8x8::new(1, 2, 2, 14, 2, 16, 17, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [u8x8; 2] = transmute(vld2_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_u16() {
-        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u16x4; 2] = [u16x4::new(0, 2, 2, 14), u16x4::new(2, 16, 17, 18)];
-        let e: [u16x4; 2] = [u16x4::new(1, 2, 2, 14), u16x4::new(2, 16, 17, 18)];
-        let r: [u16x4; 2] = transmute(vld2_lane_u16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_u32() {
-        let a: [u32; 5] = [0, 1, 2, 3, 4];
-        let b: [u32x2; 2] = [u32x2::new(0, 2), u32x2::new(2, 14)];
-        let e: [u32x2; 2] = [u32x2::new(1, 2), u32x2::new(2, 14)];
-        let r: [u32x2; 2] = transmute(vld2_lane_u32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_u16() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u16x8; 2] = [u16x8::new(0, 2, 2, 14, 2, 16, 17, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [u16x8; 2] = [u16x8::new(1, 2, 2, 14, 2, 16, 17, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [u16x8; 2] = transmute(vld2q_lane_u16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_u32() {
-        let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u32x4; 2] = [u32x4::new(0, 2, 2, 14), u32x4::new(2, 16, 17, 18)];
-        let e: [u32x4; 2] = [u32x4::new(1, 2, 2, 14), u32x4::new(2, 16, 17, 18)];
-        let r: [u32x4; 2] = transmute(vld2q_lane_u32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_p8() {
-        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x8; 2] = [i8x8::new(0, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i8x8; 2] = [i8x8::new(1, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i8x8; 2] = transmute(vld2_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_p16() {
-        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x4; 2] = [i16x4::new(0, 2, 2, 14), i16x4::new(2, 16, 17, 18)];
-        let e: [i16x4; 2] = [i16x4::new(1, 2, 2, 14), i16x4::new(2, 16, 17, 18)];
-        let r: [i16x4; 2] = transmute(vld2_lane_p16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_p16() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x8; 2] = [i16x8::new(0, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i16x8; 2] = [i16x8::new(1, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i16x8; 2] = transmute(vld2q_lane_p16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2_lane_f32() {
-        let a: [f32; 5] = [0., 1., 2., 3., 4.];
-        let b: [f32x2; 2] = [f32x2::new(0., 2.), f32x2::new(2., 14.)];
-        let e: [f32x2; 2] = [f32x2::new(1., 2.), f32x2::new(2., 14.)];
-        let r: [f32x2; 2] = transmute(vld2_lane_f32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld2q_lane_f32() {
-        let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
-        let b: [f32x4; 2] = [f32x4::new(0., 2., 2., 14.), f32x4::new(2., 16., 17., 18.)];
-        let e: [f32x4; 2] = [f32x4::new(1., 2., 2., 14.), f32x4::new(2., 16., 17., 18.)];
-        let r: [f32x4; 2] = transmute(vld2q_lane_f32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_s8() {
-        let a: [i8; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let e: [i8x8; 3] = [i8x8::new(1, 2, 2, 4, 2, 4, 7, 8), i8x8::new(2, 4, 7, 8, 13, 14, 15, 16), i8x8::new(2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [i8x8; 3] = transmute(vld3_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_s16() {
-        let a: [i16; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let e: [i16x4; 3] = [i16x4::new(1, 2, 2, 4), i16x4::new(2, 4, 7, 8), i16x4::new(2, 4, 7, 8)];
-        let r: [i16x4; 3] = transmute(vld3_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_s32() {
-        let a: [i32; 7] = [0, 1, 2, 2, 2, 4, 4];
-        let e: [i32x2; 3] = [i32x2::new(1, 2), i32x2::new(2, 4), i32x2::new(2, 4)];
-        let r: [i32x2; 3] = transmute(vld3_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_s8() {
-        let a: [i8; 49] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48];
-        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16), i8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48)];
-        let r: [i8x16; 3] = transmute(vld3q_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_s16() {
-        let a: [i16; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let e: [i16x8; 3] = [i16x8::new(1, 2, 2, 4, 2, 4, 7, 8), i16x8::new(2, 4, 7, 8, 13, 14, 15, 16), i16x8::new(2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [i16x8; 3] = transmute(vld3q_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_s32() {
-        let a: [i32; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let e: [i32x4; 3] = [i32x4::new(1, 2, 2, 4), i32x4::new(2, 4, 7, 8), i32x4::new(2, 4, 7, 8)];
-        let r: [i32x4; 3] = transmute(vld3q_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_s64() {
-        let a: [i64; 4] = [0, 1, 2, 2];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 3] = transmute(vld3_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_u8() {
-        let a: [u8; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let e: [u8x8; 3] = [u8x8::new(1, 2, 2, 4, 2, 4, 7, 8), u8x8::new(2, 4, 7, 8, 13, 14, 15, 16), u8x8::new(2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [u8x8; 3] = transmute(vld3_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_u16() {
-        let a: [u16; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let e: [u16x4; 3] = [u16x4::new(1, 2, 2, 4), u16x4::new(2, 4, 7, 8), u16x4::new(2, 4, 7, 8)];
-        let r: [u16x4; 3] = transmute(vld3_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_u32() {
-        let a: [u32; 7] = [0, 1, 2, 2, 2, 4, 4];
-        let e: [u32x2; 3] = [u32x2::new(1, 2), u32x2::new(2, 4), u32x2::new(2, 4)];
-        let r: [u32x2; 3] = transmute(vld3_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_u8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48];
-        let e: [u8x16; 3] = [u8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16), u8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32), u8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48)];
-        let r: [u8x16; 3] = transmute(vld3q_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_u16() {
-        let a: [u16; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let e: [u16x8; 3] = [u16x8::new(1, 2, 2, 4, 2, 4, 7, 8), u16x8::new(2, 4, 7, 8, 13, 14, 15, 16), u16x8::new(2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [u16x8; 3] = transmute(vld3q_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_u32() {
-        let a: [u32; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let e: [u32x4; 3] = [u32x4::new(1, 2, 2, 4), u32x4::new(2, 4, 7, 8), u32x4::new(2, 4, 7, 8)];
-        let r: [u32x4; 3] = transmute(vld3q_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_p8() {
-        let a: [u8; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let e: [i8x8; 3] = [i8x8::new(1, 2, 2, 4, 2, 4, 7, 8), i8x8::new(2, 4, 7, 8, 13, 14, 15, 16), i8x8::new(2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [i8x8; 3] = transmute(vld3_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_p16() {
-        let a: [u16; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let e: [i16x4; 3] = [i16x4::new(1, 2, 2, 4), i16x4::new(2, 4, 7, 8), i16x4::new(2, 4, 7, 8)];
-        let r: [i16x4; 3] = transmute(vld3_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_p8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48];
-        let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16), i8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48)];
-        let r: [i8x16; 3] = transmute(vld3q_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_p16() {
-        let a: [u16; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let e: [i16x8; 3] = [i16x8::new(1, 2, 2, 4, 2, 4, 7, 8), i16x8::new(2, 4, 7, 8, 13, 14, 15, 16), i16x8::new(2, 4, 7, 8, 13, 14, 15, 16)];
-        let r: [i16x8; 3] = transmute(vld3q_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_u64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(2), u64x1::new(2)];
-        let r: [u64x1; 3] = transmute(vld3_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_p64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)];
-        let r: [i64x1; 3] = transmute(vld3_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_f32() {
-        let a: [f32; 7] = [0., 1., 2., 2., 2., 4., 4.];
-        let e: [f32x2; 3] = [f32x2::new(1., 2.), f32x2::new(2., 4.), f32x2::new(2., 4.)];
-        let r: [f32x2; 3] = transmute(vld3_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_f32() {
-        let a: [f32; 13] = [0., 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8.];
-        let e: [f32x4; 3] = [f32x4::new(1., 2., 2., 4.), f32x4::new(2., 4., 7., 8.), f32x4::new(2., 4., 7., 8.)];
-        let r: [f32x4; 3] = transmute(vld3q_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_s8() {
-        let a: [i8; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13];
-        let e: [i8x8; 3] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x8; 3] = transmute(vld3_dup_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_s16() {
-        let a: [i16; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7];
-        let e: [i16x4; 3] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)];
-        let r: [i16x4; 3] = transmute(vld3_dup_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_s32() {
-        let a: [i32; 7] = [0, 1, 1, 1, 3, 1, 4];
-        let e: [i32x2; 3] = [i32x2::new(1, 1), i32x2::new(1, 1), i32x2::new(1, 1)];
-        let r: [i32x2; 3] = transmute(vld3_dup_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_s8() {
-        let a: [i8; 49] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17];
-        let e: [i8x16; 3] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x16; 3] = transmute(vld3q_dup_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_s16() {
-        let a: [i16; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13];
-        let e: [i16x8; 3] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i16x8; 3] = transmute(vld3q_dup_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_s32() {
-        let a: [i32; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7];
-        let e: [i32x4; 3] = [i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1)];
-        let r: [i32x4; 3] = transmute(vld3q_dup_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_s64() {
-        let a: [i64; 4] = [0, 1, 1, 1];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(1), i64x1::new(1)];
-        let r: [i64x1; 3] = transmute(vld3_dup_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_u8() {
-        let a: [u8; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13];
-        let e: [u8x8; 3] = [u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [u8x8; 3] = transmute(vld3_dup_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_u16() {
-        let a: [u16; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7];
-        let e: [u16x4; 3] = [u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1)];
-        let r: [u16x4; 3] = transmute(vld3_dup_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_u32() {
-        let a: [u32; 7] = [0, 1, 1, 1, 3, 1, 4];
-        let e: [u32x2; 3] = [u32x2::new(1, 1), u32x2::new(1, 1), u32x2::new(1, 1)];
-        let r: [u32x2; 3] = transmute(vld3_dup_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_u8() {
-        let a: [u8; 49] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17];
-        let e: [u8x16; 3] = [u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [u8x16; 3] = transmute(vld3q_dup_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_u16() {
-        let a: [u16; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13];
-        let e: [u16x8; 3] = [u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [u16x8; 3] = transmute(vld3q_dup_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_u32() {
-        let a: [u32; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7];
-        let e: [u32x4; 3] = [u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1)];
-        let r: [u32x4; 3] = transmute(vld3q_dup_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_p8() {
-        let a: [u8; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13];
-        let e: [i8x8; 3] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x8; 3] = transmute(vld3_dup_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_p16() {
-        let a: [u16; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7];
-        let e: [i16x4; 3] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)];
-        let r: [i16x4; 3] = transmute(vld3_dup_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_p8() {
-        let a: [u8; 49] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17];
-        let e: [i8x16; 3] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x16; 3] = transmute(vld3q_dup_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_p16() {
-        let a: [u16; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13];
-        let e: [i16x8; 3] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i16x8; 3] = transmute(vld3q_dup_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_u64() {
-        let a: [u64; 4] = [0, 1, 1, 1];
-        let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(1), u64x1::new(1)];
-        let r: [u64x1; 3] = transmute(vld3_dup_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_p64() {
-        let a: [u64; 4] = [0, 1, 1, 1];
-        let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(1), i64x1::new(1)];
-        let r: [i64x1; 3] = transmute(vld3_dup_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_dup_f32() {
-        let a: [f32; 7] = [0., 1., 1., 1., 3., 1., 4.];
-        let e: [f32x2; 3] = [f32x2::new(1., 1.), f32x2::new(1., 1.), f32x2::new(1., 1.)];
-        let r: [f32x2; 3] = transmute(vld3_dup_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_dup_f32() {
-        let a: [f32; 13] = [0., 1., 1., 1., 3., 1., 4., 3., 5., 1., 4., 3., 5.];
-        let e: [f32x4; 3] = [f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.)];
-        let r: [f32x4; 3] = transmute(vld3q_dup_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_s8() {
-        let a: [i8; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x8; 3] = [i8x8::new(0, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26), i8x8::new(11, 12, 13, 14, 15, 16, 17, 18)];
-        let e: [i8x8; 3] = [i8x8::new(1, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26), i8x8::new(2, 12, 13, 14, 15, 16, 17, 18)];
-        let r: [i8x8; 3] = transmute(vld3_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_s16() {
-        let a: [i16; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4];
-        let b: [i16x4; 3] = [i16x4::new(0, 2, 2, 14), i16x4::new(2, 16, 17, 18), i16x4::new(2, 20, 21, 22)];
-        let e: [i16x4; 3] = [i16x4::new(1, 2, 2, 14), i16x4::new(2, 16, 17, 18), i16x4::new(2, 20, 21, 22)];
-        let r: [i16x4; 3] = transmute(vld3_lane_s16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_s32() {
-        let a: [i32; 7] = [0, 1, 2, 2, 4, 5, 6];
-        let b: [i32x2; 3] = [i32x2::new(0, 2), i32x2::new(2, 14), i32x2::new(2, 16)];
-        let e: [i32x2; 3] = [i32x2::new(1, 2), i32x2::new(2, 14), i32x2::new(2, 16)];
-        let r: [i32x2; 3] = transmute(vld3_lane_s32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_s16() {
-        let a: [i16; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x8; 3] = [i16x8::new(0, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26), i16x8::new(11, 12, 13, 14, 15, 16, 17, 18)];
-        let e: [i16x8; 3] = [i16x8::new(1, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26), i16x8::new(2, 12, 13, 14, 15, 16, 17, 18)];
-        let r: [i16x8; 3] = transmute(vld3q_lane_s16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_s32() {
-        let a: [i32; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4];
-        let b: [i32x4; 3] = [i32x4::new(0, 2, 2, 14), i32x4::new(2, 16, 17, 18), i32x4::new(2, 20, 21, 22)];
-        let e: [i32x4; 3] = [i32x4::new(1, 2, 2, 14), i32x4::new(2, 16, 17, 18), i32x4::new(2, 20, 21, 22)];
-        let r: [i32x4; 3] = transmute(vld3q_lane_s32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_u8() {
-        let a: [u8; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u8x8; 3] = [u8x8::new(0, 2, 2, 14, 2, 16, 17, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26), u8x8::new(11, 12, 13, 14, 15, 16, 17, 18)];
-        let e: [u8x8; 3] = [u8x8::new(1, 2, 2, 14, 2, 16, 17, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26), u8x8::new(2, 12, 13, 14, 15, 16, 17, 18)];
-        let r: [u8x8; 3] = transmute(vld3_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_u16() {
-        let a: [u16; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4];
-        let b: [u16x4; 3] = [u16x4::new(0, 2, 2, 14), u16x4::new(2, 16, 17, 18), u16x4::new(2, 20, 21, 22)];
-        let e: [u16x4; 3] = [u16x4::new(1, 2, 2, 14), u16x4::new(2, 16, 17, 18), u16x4::new(2, 20, 21, 22)];
-        let r: [u16x4; 3] = transmute(vld3_lane_u16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_u32() {
-        let a: [u32; 7] = [0, 1, 2, 2, 4, 5, 6];
-        let b: [u32x2; 3] = [u32x2::new(0, 2), u32x2::new(2, 14), u32x2::new(2, 16)];
-        let e: [u32x2; 3] = [u32x2::new(1, 2), u32x2::new(2, 14), u32x2::new(2, 16)];
-        let r: [u32x2; 3] = transmute(vld3_lane_u32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_u16() {
-        let a: [u16; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u16x8; 3] = [u16x8::new(0, 2, 2, 14, 2, 16, 17, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26), u16x8::new(11, 12, 13, 14, 15, 16, 17, 18)];
-        let e: [u16x8; 3] = [u16x8::new(1, 2, 2, 14, 2, 16, 17, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26), u16x8::new(2, 12, 13, 14, 15, 16, 17, 18)];
-        let r: [u16x8; 3] = transmute(vld3q_lane_u16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_u32() {
-        let a: [u32; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4];
-        let b: [u32x4; 3] = [u32x4::new(0, 2, 2, 14), u32x4::new(2, 16, 17, 18), u32x4::new(2, 20, 21, 22)];
-        let e: [u32x4; 3] = [u32x4::new(1, 2, 2, 14), u32x4::new(2, 16, 17, 18), u32x4::new(2, 20, 21, 22)];
-        let r: [u32x4; 3] = transmute(vld3q_lane_u32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_p8() {
-        let a: [u8; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x8; 3] = [i8x8::new(0, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26), i8x8::new(11, 12, 13, 14, 15, 16, 17, 18)];
-        let e: [i8x8; 3] = [i8x8::new(1, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26), i8x8::new(2, 12, 13, 14, 15, 16, 17, 18)];
-        let r: [i8x8; 3] = transmute(vld3_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_p16() {
-        let a: [u16; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4];
-        let b: [i16x4; 3] = [i16x4::new(0, 2, 2, 14), i16x4::new(2, 16, 17, 18), i16x4::new(2, 20, 21, 22)];
-        let e: [i16x4; 3] = [i16x4::new(1, 2, 2, 14), i16x4::new(2, 16, 17, 18), i16x4::new(2, 20, 21, 22)];
-        let r: [i16x4; 3] = transmute(vld3_lane_p16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_p16() {
-        let a: [u16; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x8; 3] = [i16x8::new(0, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26), i16x8::new(11, 12, 13, 14, 15, 16, 17, 18)];
-        let e: [i16x8; 3] = [i16x8::new(1, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26), i16x8::new(2, 12, 13, 14, 15, 16, 17, 18)];
-        let r: [i16x8; 3] = transmute(vld3q_lane_p16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3_lane_f32() {
-        let a: [f32; 7] = [0., 1., 2., 2., 4., 5., 6.];
-        let b: [f32x2; 3] = [f32x2::new(0., 2.), f32x2::new(2., 14.), f32x2::new(9., 16.)];
-        let e: [f32x2; 3] = [f32x2::new(1., 2.), f32x2::new(2., 14.), f32x2::new(2., 16.)];
-        let r: [f32x2; 3] = transmute(vld3_lane_f32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld3q_lane_f32() {
-        let a: [f32; 13] = [0., 1., 2., 2., 4., 5., 6., 7., 8., 5., 6., 7., 8.];
-        let b: [f32x4; 3] = [f32x4::new(0., 2., 2., 14.), f32x4::new(9., 16., 17., 18.), f32x4::new(5., 6., 7., 8.)];
-        let e: [f32x4; 3] = [f32x4::new(1., 2., 2., 14.), f32x4::new(2., 16., 17., 18.), f32x4::new(2., 6., 7., 8.)];
-        let r: [f32x4; 3] = transmute(vld3q_lane_f32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_s8() {
-        let a: [i8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [i8x8; 4] = [i8x8::new(1, 2, 2, 6, 2, 6, 6, 8), i8x8::new(2, 6, 6, 8, 6, 8, 8, 16), i8x8::new(2, 6, 6, 8, 6, 8, 8, 16), i8x8::new(6, 8, 8, 16, 8, 16, 16, 32)];
-        let r: [i8x8; 4] = transmute(vld4_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_s16() {
-        let a: [i16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [i16x4; 4] = [i16x4::new(1, 2, 2, 6), i16x4::new(2, 6, 6, 8), i16x4::new(2, 6, 6, 8), i16x4::new(6, 8, 8, 16)];
-        let r: [i16x4; 4] = transmute(vld4_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_s32() {
-        let a: [i32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i32x2; 4] = [i32x2::new(1, 2), i32x2::new(2, 6), i32x2::new(2, 6), i32x2::new(6, 8)];
-        let r: [i32x2; 4] = transmute(vld4_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_s8() {
-        let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16), i8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32), i8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48), i8x16::new(6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64)];
-        let r: [i8x16; 4] = transmute(vld4q_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_s16() {
-        let a: [i16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [i16x8; 4] = [i16x8::new(1, 2, 2, 6, 2, 6, 6, 8), i16x8::new(2, 6, 6, 8, 6, 8, 8, 16), i16x8::new(2, 6, 6, 8, 6, 8, 8, 16), i16x8::new(6, 8, 8, 16, 8, 16, 16, 32)];
-        let r: [i16x8; 4] = transmute(vld4q_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_s32() {
-        let a: [i32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [i32x4; 4] = [i32x4::new(1, 2, 2, 6), i32x4::new(2, 6, 6, 8), i32x4::new(2, 6, 6, 8), i32x4::new(6, 8, 8, 16)];
-        let r: [i32x4; 4] = transmute(vld4q_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 6];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(6)];
-        let r: [i64x1; 4] = transmute(vld4_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_u8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u8x8; 4] = [u8x8::new(1, 2, 2, 6, 2, 6, 6, 8), u8x8::new(2, 6, 6, 8, 6, 8, 8, 16), u8x8::new(2, 6, 6, 8, 6, 8, 8, 16), u8x8::new(6, 8, 8, 16, 8, 16, 16, 32)];
-        let r: [u8x8; 4] = transmute(vld4_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_u16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [u16x4; 4] = [u16x4::new(1, 2, 2, 6), u16x4::new(2, 6, 6, 8), u16x4::new(2, 6, 6, 8), u16x4::new(6, 8, 8, 16)];
-        let r: [u16x4; 4] = transmute(vld4_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_u32() {
-        let a: [u32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u32x2; 4] = [u32x2::new(1, 2), u32x2::new(2, 6), u32x2::new(2, 6), u32x2::new(6, 8)];
-        let r: [u32x2; 4] = transmute(vld4_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_u8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [u8x16; 4] = [u8x16::new(1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16), u8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32), u8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48), u8x16::new(6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64)];
-        let r: [u8x16; 4] = transmute(vld4q_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_u16() {
-        let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u16x8; 4] = [u16x8::new(1, 2, 2, 6, 2, 6, 6, 8), u16x8::new(2, 6, 6, 8, 6, 8, 8, 16), u16x8::new(2, 6, 6, 8, 6, 8, 8, 16), u16x8::new(6, 8, 8, 16, 8, 16, 16, 32)];
-        let r: [u16x8; 4] = transmute(vld4q_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_u32() {
-        let a: [u32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [u32x4; 4] = [u32x4::new(1, 2, 2, 6), u32x4::new(2, 6, 6, 8), u32x4::new(2, 6, 6, 8), u32x4::new(6, 8, 8, 16)];
-        let r: [u32x4; 4] = transmute(vld4q_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_p8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [i8x8; 4] = [i8x8::new(1, 2, 2, 6, 2, 6, 6, 8), i8x8::new(2, 6, 6, 8, 6, 8, 8, 16), i8x8::new(2, 6, 6, 8, 6, 8, 8, 16), i8x8::new(6, 8, 8, 16, 8, 16, 16, 32)];
-        let r: [i8x8; 4] = transmute(vld4_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_p16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [i16x4; 4] = [i16x4::new(1, 2, 2, 6), i16x4::new(2, 6, 6, 8), i16x4::new(2, 6, 6, 8), i16x4::new(6, 8, 8, 16)];
-        let r: [i16x4; 4] = transmute(vld4_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_p8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16), i8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32), i8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48), i8x16::new(6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64)];
-        let r: [i8x16; 4] = transmute(vld4q_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_p16() {
-        let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [i16x8; 4] = [i16x8::new(1, 2, 2, 6, 2, 6, 6, 8), i16x8::new(2, 6, 6, 8, 6, 8, 8, 16), i16x8::new(2, 6, 6, 8, 6, 8, 8, 16), i16x8::new(6, 8, 8, 16, 8, 16, 16, 32)];
-        let r: [i16x8; 4] = transmute(vld4q_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 6];
-        let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(2), u64x1::new(2), u64x1::new(6)];
-        let r: [u64x1; 4] = transmute(vld4_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 6];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(6)];
-        let r: [i64x1; 4] = transmute(vld4_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_f32() {
-        let a: [f32; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
-        let e: [f32x2; 4] = [f32x2::new(1., 2.), f32x2::new(2., 6.), f32x2::new(2., 6.), f32x2::new(6., 8.)];
-        let r: [f32x2; 4] = transmute(vld4_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_f32() {
-        let a: [f32; 17] = [0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 15., 16.];
-        let e: [f32x4; 4] = [f32x4::new(1., 2., 2., 6.), f32x4::new(2., 6., 6., 8.), f32x4::new(2., 6., 6., 15.), f32x4::new(6., 8., 8., 16.)];
-        let r: [f32x4; 4] = transmute(vld4q_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_s8() {
-        let a: [i8; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i8x8; 4] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x8; 4] = transmute(vld4_dup_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_s16() {
-        let a: [i16; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i16x4; 4] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)];
-        let r: [i16x4; 4] = transmute(vld4_dup_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_s32() {
-        let a: [i32; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
-        let e: [i32x2; 4] = [i32x2::new(1, 1), i32x2::new(1, 1), i32x2::new(1, 1), i32x2::new(1, 1)];
-        let r: [i32x2; 4] = transmute(vld4_dup_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_s8() {
-        let a: [i8; 65] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i8x16; 4] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x16; 4] = transmute(vld4q_dup_s8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_s16() {
-        let a: [i16; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i16x8; 4] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i16x8; 4] = transmute(vld4q_dup_s16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_s32() {
-        let a: [i32; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i32x4; 4] = [i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1)];
-        let r: [i32x4; 4] = transmute(vld4q_dup_s32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_s64() {
-        let a: [i64; 5] = [0, 1, 1, 1, 1];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(1), i64x1::new(1), i64x1::new(1)];
-        let r: [i64x1; 4] = transmute(vld4_dup_s64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_u8() {
-        let a: [u8; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [u8x8; 4] = [u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [u8x8; 4] = transmute(vld4_dup_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_u16() {
-        let a: [u16; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [u16x4; 4] = [u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1)];
-        let r: [u16x4; 4] = transmute(vld4_dup_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_u32() {
-        let a: [u32; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5];
-        let e: [u32x2; 4] = [u32x2::new(1, 1), u32x2::new(1, 1), u32x2::new(1, 1), u32x2::new(1, 1)];
-        let r: [u32x2; 4] = transmute(vld4_dup_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_u8() {
-        let a: [u8; 65] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [u8x16; 4] = [u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [u8x16; 4] = transmute(vld4q_dup_u8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_u16() {
-        let a: [u16; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [u16x8; 4] = [u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [u16x8; 4] = transmute(vld4q_dup_u16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_u32() {
-        let a: [u32; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [u32x4; 4] = [u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1)];
-        let r: [u32x4; 4] = transmute(vld4q_dup_u32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_p8() {
-        let a: [u8; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i8x8; 4] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x8; 4] = transmute(vld4_dup_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_p16() {
-        let a: [u16; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i16x4; 4] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)];
-        let r: [i16x4; 4] = transmute(vld4_dup_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_p8() {
-        let a: [u8; 65] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i8x16; 4] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i8x16; 4] = transmute(vld4q_dup_p8(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_p16() {
-        let a: [u16; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9];
-        let e: [i16x8; 4] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)];
-        let r: [i16x8; 4] = transmute(vld4q_dup_p16(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_u64() {
-        let a: [u64; 5] = [0, 1, 1, 1, 1];
-        let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(1), u64x1::new(1), u64x1::new(1)];
-        let r: [u64x1; 4] = transmute(vld4_dup_u64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_p64() {
-        let a: [u64; 5] = [0, 1, 1, 1, 1];
-        let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(1), i64x1::new(1), i64x1::new(1)];
-        let r: [i64x1; 4] = transmute(vld4_dup_p64(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_dup_f32() {
-        let a: [f32; 9] = [0., 1., 1., 1., 1., 6., 4., 3., 5.];
-        let e: [f32x2; 4] = [f32x2::new(1., 1.), f32x2::new(1., 1.), f32x2::new(1., 1.), f32x2::new(1., 1.)];
-        let r: [f32x2; 4] = transmute(vld4_dup_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_dup_f32() {
-        let a: [f32; 17] = [0., 1., 1., 1., 1., 6., 4., 3., 5., 7., 4., 3., 5., 8., 4., 3., 5.];
-        let e: [f32x4; 4] = [f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.)];
-        let r: [f32x4; 4] = transmute(vld4q_dup_f32(a[1..].as_ptr()));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_s8() {
-        let a: [i8; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x8; 4] = [i8x8::new(0, 2, 2, 2, 2, 16, 2, 18), i8x8::new(2, 20, 21, 22, 2, 24, 25, 26), i8x8::new(11, 12, 13, 14, 15, 16, 2, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i8x8; 4] = [i8x8::new(1, 2, 2, 2, 2, 16, 2, 18), i8x8::new(2, 20, 21, 22, 2, 24, 25, 26), i8x8::new(2, 12, 13, 14, 15, 16, 2, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i8x8; 4] = transmute(vld4_lane_s8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_s16() {
-        let a: [i16; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x4; 4] = [i16x4::new(0, 2, 2, 2), i16x4::new(2, 16, 2, 18), i16x4::new(2, 20, 21, 22), i16x4::new(2, 24, 25, 26)];
-        let e: [i16x4; 4] = [i16x4::new(1, 2, 2, 2), i16x4::new(2, 16, 2, 18), i16x4::new(2, 20, 21, 22), i16x4::new(2, 24, 25, 26)];
-        let r: [i16x4; 4] = transmute(vld4_lane_s16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_s32() {
-        let a: [i32; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
-        let b: [i32x2; 4] = [i32x2::new(0, 2), i32x2::new(2, 2), i32x2::new(2, 16), i32x2::new(2, 18)];
-        let e: [i32x2; 4] = [i32x2::new(1, 2), i32x2::new(2, 2), i32x2::new(2, 16), i32x2::new(2, 18)];
-        let r: [i32x2; 4] = transmute(vld4_lane_s32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_s16() {
-        let a: [i16; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x8; 4] = [i16x8::new(0, 2, 2, 2, 2, 16, 2, 18), i16x8::new(2, 20, 21, 22, 2, 24, 25, 26), i16x8::new(11, 12, 13, 14, 15, 16, 2, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i16x8; 4] = [i16x8::new(1, 2, 2, 2, 2, 16, 2, 18), i16x8::new(2, 20, 21, 22, 2, 24, 25, 26), i16x8::new(2, 12, 13, 14, 15, 16, 2, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i16x8; 4] = transmute(vld4q_lane_s16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_s32() {
-        let a: [i32; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i32x4; 4] = [i32x4::new(0, 2, 2, 2), i32x4::new(2, 16, 2, 18), i32x4::new(2, 20, 21, 22), i32x4::new(2, 24, 25, 26)];
-        let e: [i32x4; 4] = [i32x4::new(1, 2, 2, 2), i32x4::new(2, 16, 2, 18), i32x4::new(2, 20, 21, 22), i32x4::new(2, 24, 25, 26)];
-        let r: [i32x4; 4] = transmute(vld4q_lane_s32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_u8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u8x8; 4] = [u8x8::new(0, 2, 2, 2, 2, 16, 2, 18), u8x8::new(2, 20, 21, 22, 2, 24, 25, 26), u8x8::new(11, 12, 13, 14, 15, 16, 2, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [u8x8; 4] = [u8x8::new(1, 2, 2, 2, 2, 16, 2, 18), u8x8::new(2, 20, 21, 22, 2, 24, 25, 26), u8x8::new(2, 12, 13, 14, 15, 16, 2, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [u8x8; 4] = transmute(vld4_lane_u8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_u16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u16x4; 4] = [u16x4::new(0, 2, 2, 2), u16x4::new(2, 16, 2, 18), u16x4::new(2, 20, 21, 22), u16x4::new(2, 24, 25, 26)];
-        let e: [u16x4; 4] = [u16x4::new(1, 2, 2, 2), u16x4::new(2, 16, 2, 18), u16x4::new(2, 20, 21, 22), u16x4::new(2, 24, 25, 26)];
-        let r: [u16x4; 4] = transmute(vld4_lane_u16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_u32() {
-        let a: [u32; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8];
-        let b: [u32x2; 4] = [u32x2::new(0, 2), u32x2::new(2, 2), u32x2::new(2, 16), u32x2::new(2, 18)];
-        let e: [u32x2; 4] = [u32x2::new(1, 2), u32x2::new(2, 2), u32x2::new(2, 16), u32x2::new(2, 18)];
-        let r: [u32x2; 4] = transmute(vld4_lane_u32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_u16() {
-        let a: [u16; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u16x8; 4] = [u16x8::new(0, 2, 2, 2, 2, 16, 2, 18), u16x8::new(2, 20, 21, 22, 2, 24, 25, 26), u16x8::new(11, 12, 13, 14, 15, 16, 2, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [u16x8; 4] = [u16x8::new(1, 2, 2, 2, 2, 16, 2, 18), u16x8::new(2, 20, 21, 22, 2, 24, 25, 26), u16x8::new(2, 12, 13, 14, 15, 16, 2, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [u16x8; 4] = transmute(vld4q_lane_u16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_u32() {
-        let a: [u32; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [u32x4; 4] = [u32x4::new(0, 2, 2, 2), u32x4::new(2, 16, 2, 18), u32x4::new(2, 20, 21, 22), u32x4::new(2, 24, 25, 26)];
-        let e: [u32x4; 4] = [u32x4::new(1, 2, 2, 2), u32x4::new(2, 16, 2, 18), u32x4::new(2, 20, 21, 22), u32x4::new(2, 24, 25, 26)];
-        let r: [u32x4; 4] = transmute(vld4q_lane_u32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_p8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i8x8; 4] = [i8x8::new(0, 2, 2, 2, 2, 16, 2, 18), i8x8::new(2, 20, 21, 22, 2, 24, 25, 26), i8x8::new(11, 12, 13, 14, 15, 16, 2, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i8x8; 4] = [i8x8::new(1, 2, 2, 2, 2, 16, 2, 18), i8x8::new(2, 20, 21, 22, 2, 24, 25, 26), i8x8::new(2, 12, 13, 14, 15, 16, 2, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i8x8; 4] = transmute(vld4_lane_p8::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_p16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x4; 4] = [i16x4::new(0, 2, 2, 2), i16x4::new(2, 16, 2, 18), i16x4::new(2, 20, 21, 22), i16x4::new(2, 24, 25, 26)];
-        let e: [i16x4; 4] = [i16x4::new(1, 2, 2, 2), i16x4::new(2, 16, 2, 18), i16x4::new(2, 20, 21, 22), i16x4::new(2, 24, 25, 26)];
-        let r: [i16x4; 4] = transmute(vld4_lane_p16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_p16() {
-        let a: [u16; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8];
-        let b: [i16x8; 4] = [i16x8::new(0, 2, 2, 2, 2, 16, 2, 18), i16x8::new(2, 20, 21, 22, 2, 24, 25, 26), i16x8::new(11, 12, 13, 14, 15, 16, 2, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let e: [i16x8; 4] = [i16x8::new(1, 2, 2, 2, 2, 16, 2, 18), i16x8::new(2, 20, 21, 22, 2, 24, 25, 26), i16x8::new(2, 12, 13, 14, 15, 16, 2, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)];
-        let r: [i16x8; 4] = transmute(vld4q_lane_p16::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4_lane_f32() {
-        let a: [f32; 9] = [0., 1., 2., 2., 2., 5., 6., 7., 8.];
-        let b: [f32x2; 4] = [f32x2::new(0., 2.), f32x2::new(2., 2.), f32x2::new(2., 16.), f32x2::new(2., 18.)];
-        let e: [f32x2; 4] = [f32x2::new(1., 2.), f32x2::new(2., 2.), f32x2::new(2., 16.), f32x2::new(2., 18.)];
-        let r: [f32x2; 4] = transmute(vld4_lane_f32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld4q_lane_f32() {
-        let a: [f32; 17] = [0., 1., 2., 2., 2., 5., 6., 7., 8., 5., 6., 7., 8., 1., 4., 3., 5.];
-        let b: [f32x4; 4] = [f32x4::new(0., 2., 2., 2.), f32x4::new(2., 16., 2., 18.), f32x4::new(5., 6., 7., 8.), f32x4::new(1., 4., 3., 5.)];
-        let e: [f32x4; 4] = [f32x4::new(1., 2., 2., 2.), f32x4::new(2., 16., 2., 18.), f32x4::new(2., 6., 7., 8.), f32x4::new(2., 4., 3., 5.)];
-        let r: [f32x4; 4] = transmute(vld4q_lane_f32::<0>(a[1..].as_ptr(), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_s8() {
-        let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i8; 8] = [1, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 8] = [0i8; 8];
-        vst1_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_s16() {
-        let a: [i16; 5] = [0, 1, 2, 3, 4];
-        let e: [i16; 4] = [1, 0, 0, 0];
-        let mut r: [i16; 4] = [0i16; 4];
-        vst1_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_s32() {
-        let a: [i32; 3] = [0, 1, 2];
-        let e: [i32; 2] = [1, 0];
-        let mut r: [i32; 2] = [0i32; 2];
-        vst1_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_s64() {
-        let a: [i64; 2] = [0, 1];
-        let e: [i64; 1] = [1];
-        let mut r: [i64; 1] = [0i64; 1];
-        vst1_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_s8() {
-        let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i8; 16] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 16] = [0i8; 16];
-        vst1q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_s16() {
-        let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i16; 8] = [1, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i16; 8] = [0i16; 8];
-        vst1q_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_s32() {
-        let a: [i32; 5] = [0, 1, 2, 3, 4];
-        let e: [i32; 4] = [1, 0, 0, 0];
-        let mut r: [i32; 4] = [0i32; 4];
-        vst1q_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_s64() {
-        let a: [i64; 3] = [0, 1, 2];
-        let e: [i64; 2] = [1, 0];
-        let mut r: [i64; 2] = [0i64; 2];
-        vst1q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_u8() {
-        let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u8; 8] = [1, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 8] = [0u8; 8];
-        vst1_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_u16() {
-        let a: [u16; 5] = [0, 1, 2, 3, 4];
-        let e: [u16; 4] = [1, 0, 0, 0];
-        let mut r: [u16; 4] = [0u16; 4];
-        vst1_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_u32() {
-        let a: [u32; 3] = [0, 1, 2];
-        let e: [u32; 2] = [1, 0];
-        let mut r: [u32; 2] = [0u32; 2];
-        vst1_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_u64() {
-        let a: [u64; 2] = [0, 1];
-        let e: [u64; 1] = [1];
-        let mut r: [u64; 1] = [0u64; 1];
-        vst1_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_u8() {
-        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u8; 16] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 16] = [0u8; 16];
-        vst1q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_u16() {
-        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u16; 8] = [1, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 8] = [0u16; 8];
-        vst1q_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_u32() {
-        let a: [u32; 5] = [0, 1, 2, 3, 4];
-        let e: [u32; 4] = [1, 0, 0, 0];
-        let mut r: [u32; 4] = [0u32; 4];
-        vst1q_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_u64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 0];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst1q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_p8() {
-        let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u8; 8] = [1, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 8] = [0u8; 8];
-        vst1_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_p16() {
-        let a: [u16; 5] = [0, 1, 2, 3, 4];
-        let e: [u16; 4] = [1, 0, 0, 0];
-        let mut r: [u16; 4] = [0u16; 4];
-        vst1_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_p8() {
-        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u8; 16] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 16] = [0u8; 16];
-        vst1q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_p16() {
-        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u16; 8] = [1, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 8] = [0u16; 8];
-        vst1q_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_p64() {
-        let a: [u64; 2] = [0, 1];
-        let e: [u64; 1] = [1];
-        let mut r: [u64; 1] = [0u64; 1];
-        vst1_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_p64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 0];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst1q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_lane_f32() {
-        let a: [f32; 3] = [0., 1., 2.];
-        let e: [f32; 2] = [1., 0.];
-        let mut r: [f32; 2] = [0f32; 2];
-        vst1_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_lane_f32() {
-        let a: [f32; 5] = [0., 1., 2., 3., 4.];
-        let e: [f32; 4] = [1., 0., 0., 0.];
-        let mut r: [f32; 4] = [0f32; 4];
-        vst1q_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s8_x2() {
-        let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [i8; 16] = [0i8; 16];
-        vst1_s8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s16_x2() {
-        let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [i16; 8] = [0i16; 8];
-        vst1_s16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s32_x2() {
-        let a: [i32; 5] = [0, 1, 2, 3, 4];
-        let e: [i32; 4] = [1, 2, 3, 4];
-        let mut r: [i32; 4] = [0i32; 4];
-        vst1_s32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s64_x2() {
-        let a: [i64; 3] = [0, 1, 2];
-        let e: [i64; 2] = [1, 2];
-        let mut r: [i64; 2] = [0i64; 2];
-        vst1_s64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s8_x2() {
-        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [i8; 32] = [0i8; 32];
-        vst1q_s8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s16_x2() {
-        let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [i16; 16] = [0i16; 16];
-        vst1q_s16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s32_x2() {
-        let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [i32; 8] = [0i32; 8];
-        vst1q_s32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s64_x2() {
-        let a: [i64; 5] = [0, 1, 2, 3, 4];
-        let e: [i64; 4] = [1, 2, 3, 4];
-        let mut r: [i64; 4] = [0i64; 4];
-        vst1q_s64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s8_x3() {
-        let a: [i8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [i8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let mut r: [i8; 24] = [0i8; 24];
-        vst1_s8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s16_x3() {
-        let a: [i16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [i16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let mut r: [i16; 12] = [0i16; 12];
-        vst1_s16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s32_x3() {
-        let a: [i32; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [i32; 6] = [1, 2, 3, 4, 5, 6];
-        let mut r: [i32; 6] = [0i32; 6];
-        vst1_s32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s64_x3() {
-        let a: [i64; 4] = [0, 1, 2, 3];
-        let e: [i64; 3] = [1, 2, 3];
-        let mut r: [i64; 3] = [0i64; 3];
-        vst1_s64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s8_x3() {
-        let a: [i8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [i8; 48] = [0i8; 48];
-        vst1q_s8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s16_x3() {
-        let a: [i16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [i16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let mut r: [i16; 24] = [0i16; 24];
-        vst1q_s16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s32_x3() {
-        let a: [i32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [i32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let mut r: [i32; 12] = [0i32; 12];
-        vst1q_s32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s64_x3() {
-        let a: [i64; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [i64; 6] = [1, 2, 3, 4, 5, 6];
-        let mut r: [i64; 6] = [0i64; 6];
-        vst1q_s64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s8_x4() {
-        let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [i8; 32] = [0i8; 32];
-        vst1_s8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s16_x4() {
-        let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [i16; 16] = [0i16; 16];
-        vst1_s16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s32_x4() {
-        let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [i32; 8] = [0i32; 8];
-        vst1_s32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_s64_x4() {
-        let a: [i64; 5] = [0, 1, 2, 3, 4];
-        let e: [i64; 4] = [1, 2, 3, 4];
-        let mut r: [i64; 4] = [0i64; 4];
-        vst1_s64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s8_x4() {
-        let a: [i8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [i8; 64] = [0i8; 64];
-        vst1q_s8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s16_x4() {
-        let a: [i16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [i16; 32] = [0i16; 32];
-        vst1q_s16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s32_x4() {
-        let a: [i32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [i32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [i32; 16] = [0i32; 16];
-        vst1q_s32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_s64_x4() {
-        let a: [i64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [i64; 8] = [0i64; 8];
-        vst1q_s64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u8_x2() {
-        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [u8; 16] = [0u8; 16];
-        vst1_u8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u16_x2() {
-        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [u16; 8] = [0u16; 8];
-        vst1_u16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u32_x2() {
-        let a: [u32; 5] = [0, 1, 2, 3, 4];
-        let e: [u32; 4] = [1, 2, 3, 4];
-        let mut r: [u32; 4] = [0u32; 4];
-        vst1_u32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u64_x2() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 2];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst1_u64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u8_x2() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst1q_u8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u16_x2() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst1q_u16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u32_x2() {
-        let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [u32; 8] = [0u32; 8];
-        vst1q_u32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u64_x2() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let e: [u64; 4] = [1, 2, 3, 4];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst1q_u64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u8_x3() {
-        let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let mut r: [u8; 24] = [0u8; 24];
-        vst1_u8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u16_x3() {
-        let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let mut r: [u16; 12] = [0u16; 12];
-        vst1_u16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u32_x3() {
-        let a: [u32; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [u32; 6] = [1, 2, 3, 4, 5, 6];
-        let mut r: [u32; 6] = [0u32; 6];
-        vst1_u32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u64_x3() {
-        let a: [u64; 4] = [0, 1, 2, 3];
-        let e: [u64; 3] = [1, 2, 3];
-        let mut r: [u64; 3] = [0u64; 3];
-        vst1_u64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u8_x3() {
-        let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [u8; 48] = [0u8; 48];
-        vst1q_u8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u16_x3() {
-        let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let mut r: [u16; 24] = [0u16; 24];
-        vst1q_u16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u32_x3() {
-        let a: [u32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [u32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let mut r: [u32; 12] = [0u32; 12];
-        vst1q_u32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u64_x3() {
-        let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [u64; 6] = [1, 2, 3, 4, 5, 6];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst1q_u64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u8_x4() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst1_u8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u16_x4() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst1_u16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u32_x4() {
-        let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [u32; 8] = [0u32; 8];
-        vst1_u32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_u64_x4() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let e: [u64; 4] = [1, 2, 3, 4];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst1_u64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u8_x4() {
-        let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [u8; 64] = [0u8; 64];
-        vst1q_u8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u16_x4() {
-        let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [u16; 32] = [0u16; 32];
-        vst1q_u16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u32_x4() {
-        let a: [u32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [u32; 16] = [0u32; 16];
-        vst1q_u32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_u64_x4() {
-        let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst1q_u64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_p8_x2() {
-        let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [u8; 16] = [0u8; 16];
-        vst1_p8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_p8_x3() {
-        let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let mut r: [u8; 24] = [0u8; 24];
-        vst1_p8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_p8_x4() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst1_p8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_p8_x2() {
-        let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst1q_p8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_p8_x3() {
-        let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [u8; 48] = [0u8; 48];
-        vst1q_p8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_p8_x4() {
-        let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [u8; 64] = [0u8; 64];
-        vst1q_p8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_p16_x2() {
-        let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [u16; 8] = [0u16; 8];
-        vst1_p16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_p16_x3() {
-        let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
-        let mut r: [u16; 12] = [0u16; 12];
-        vst1_p16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_p16_x4() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst1_p16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_p16_x2() {
-        let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst1q_p16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_p16_x3() {
-        let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24];
-        let mut r: [u16; 24] = [0u16; 24];
-        vst1q_p16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_p16_x4() {
-        let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let mut r: [u16; 32] = [0u16; 32];
-        vst1q_p16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_p64_x2() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 2];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst1_p64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_p64_x3() {
-        let a: [u64; 4] = [0, 1, 2, 3];
-        let e: [u64; 3] = [1, 2, 3];
-        let mut r: [u64; 3] = [0u64; 3];
-        vst1_p64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_p64_x4() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let e: [u64; 4] = [1, 2, 3, 4];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst1_p64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_p64_x2() {
-        let a: [u64; 5] = [0, 1, 2, 3, 4];
-        let e: [u64; 4] = [1, 2, 3, 4];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst1q_p64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_p64_x3() {
-        let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6];
-        let e: [u64; 6] = [1, 2, 3, 4, 5, 6];
-        let mut r: [u64; 6] = [0u64; 6];
-        vst1q_p64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_p64_x4() {
-        let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
-        let e: [u64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let mut r: [u64; 8] = [0u64; 8];
-        vst1q_p64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f32_x2() {
-        let a: [f32; 5] = [0., 1., 2., 3., 4.];
-        let e: [f32; 4] = [1., 2., 3., 4.];
-        let mut r: [f32; 4] = [0f32; 4];
-        vst1_f32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f32_x2() {
-        let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
-        let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
-        let mut r: [f32; 8] = [0f32; 8];
-        vst1q_f32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f32_x3() {
-        let a: [f32; 7] = [0., 1., 2., 3., 4., 5., 6.];
-        let e: [f32; 6] = [1., 2., 3., 4., 5., 6.];
-        let mut r: [f32; 6] = [0f32; 6];
-        vst1_f32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f32_x3() {
-        let a: [f32; 13] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.];
-        let e: [f32; 12] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.];
-        let mut r: [f32; 12] = [0f32; 12];
-        vst1q_f32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f32_x4() {
-        let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
-        let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
-        let mut r: [f32; 8] = [0f32; 8];
-        vst1_f32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f32_x4() {
-        let a: [f32; 17] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.];
-        let e: [f32; 16] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.];
-        let mut r: [f32; 16] = [0f32; 16];
-        vst1q_f32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_s8() {
-        let a: [i8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [i8; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let mut r: [i8; 16] = [0i8; 16];
-        vst2_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_s16() {
-        let a: [i16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [i16; 8] = [1, 2, 2, 3, 2, 4, 3, 5];
-        let mut r: [i16; 8] = [0i16; 8];
-        vst2_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_s32() {
-        let a: [i32; 5] = [0, 1, 2, 2, 3];
-        let e: [i32; 4] = [1, 2, 2, 3];
-        let mut r: [i32; 4] = [0i32; 4];
-        vst2_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_s8() {
-        let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
-        let e: [i8; 32] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17];
-        let mut r: [i8; 32] = [0i8; 32];
-        vst2q_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_s16() {
-        let a: [i16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [i16; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let mut r: [i16; 16] = [0i16; 16];
-        vst2q_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_s32() {
-        let a: [i32; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [i32; 8] = [1, 2, 2, 3, 2, 4, 3, 5];
-        let mut r: [i32; 8] = [0i32; 8];
-        vst2q_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_s64() {
-        let a: [i64; 3] = [0, 1, 2];
-        let e: [i64; 2] = [1, 2];
-        let mut r: [i64; 2] = [0i64; 2];
-        vst2_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_u8() {
-        let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [u8; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let mut r: [u8; 16] = [0u8; 16];
-        vst2_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_u16() {
-        let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [u16; 8] = [1, 2, 2, 3, 2, 4, 3, 5];
-        let mut r: [u16; 8] = [0u16; 8];
-        vst2_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_u32() {
-        let a: [u32; 5] = [0, 1, 2, 2, 3];
-        let e: [u32; 4] = [1, 2, 2, 3];
-        let mut r: [u32; 4] = [0u32; 4];
-        vst2_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_u8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
-        let e: [u8; 32] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst2q_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_u16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [u16; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst2q_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_u32() {
-        let a: [u32; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [u32; 8] = [1, 2, 2, 3, 2, 4, 3, 5];
-        let mut r: [u32; 8] = [0u32; 8];
-        vst2q_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_p8() {
-        let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [u8; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let mut r: [u8; 16] = [0u8; 16];
-        vst2_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_p16() {
-        let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [u16; 8] = [1, 2, 2, 3, 2, 4, 3, 5];
-        let mut r: [u16; 8] = [0u16; 8];
-        vst2_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_p8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17];
-        let e: [u8; 32] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst2q_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_p16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [u16; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst2q_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_u64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 2];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst2_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_p64() {
-        let a: [u64; 3] = [0, 1, 2];
-        let e: [u64; 2] = [1, 2];
-        let mut r: [u64; 2] = [0u64; 2];
-        vst2_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_f32() {
-        let a: [f32; 5] = [0., 1., 2., 2., 3.];
-        let e: [f32; 4] = [1., 2., 2., 3.];
-        let mut r: [f32; 4] = [0f32; 4];
-        vst2_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_f32() {
-        let a: [f32; 9] = [0., 1., 2., 2., 3., 2., 3., 4., 5.];
-        let e: [f32; 8] = [1., 2., 2., 3., 2., 4., 3., 5.];
-        let mut r: [f32; 8] = [0f32; 8];
-        vst2q_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_s8() {
-        let a: [i8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [i8; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 16] = [0i8; 16];
-        vst2_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_s16() {
-        let a: [i16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [i16; 8] = [1, 2, 0, 0, 0, 0, 0, 0];
-        let mut r: [i16; 8] = [0i16; 8];
-        vst2_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_s32() {
-        let a: [i32; 5] = [0, 1, 2, 2, 3];
-        let e: [i32; 4] = [1, 2, 0, 0];
-        let mut r: [i32; 4] = [0i32; 4];
-        vst2_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_s16() {
-        let a: [i16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [i16; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i16; 16] = [0i16; 16];
-        vst2q_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_s32() {
-        let a: [i32; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [i32; 8] = [1, 2, 0, 0, 0, 0, 0, 0];
-        let mut r: [i32; 8] = [0i32; 8];
-        vst2q_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_u8() {
-        let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [u8; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 16] = [0u8; 16];
-        vst2_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_u16() {
-        let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [u16; 8] = [1, 2, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 8] = [0u16; 8];
-        vst2_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_u32() {
-        let a: [u32; 5] = [0, 1, 2, 2, 3];
-        let e: [u32; 4] = [1, 2, 0, 0];
-        let mut r: [u32; 4] = [0u32; 4];
-        vst2_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_u16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [u16; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst2q_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_u32() {
-        let a: [u32; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [u32; 8] = [1, 2, 0, 0, 0, 0, 0, 0];
-        let mut r: [u32; 8] = [0u32; 8];
-        vst2q_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_p8() {
-        let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [u8; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 16] = [0u8; 16];
-        vst2_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_p16() {
-        let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5];
-        let e: [u16; 8] = [1, 2, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 8] = [0u16; 8];
-        vst2_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_p16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9];
-        let e: [u16; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst2q_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2_lane_f32() {
-        let a: [f32; 5] = [0., 1., 2., 2., 3.];
-        let e: [f32; 4] = [1., 2., 0., 0.];
-        let mut r: [f32; 4] = [0f32; 4];
-        vst2_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst2q_lane_f32() {
-        let a: [f32; 9] = [0., 1., 2., 2., 3., 2., 3., 4., 5.];
-        let e: [f32; 8] = [1., 2., 0., 0., 0., 0., 0., 0.];
-        let mut r: [f32; 8] = [0f32; 8];
-        vst2q_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_s8() {
-        let a: [i8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [i8; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let mut r: [i8; 24] = [0i8; 24];
-        vst3_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_s16() {
-        let a: [i16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [i16; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let mut r: [i16; 12] = [0i16; 12];
-        vst3_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_s32() {
-        let a: [i32; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [i32; 6] = [1, 2, 2, 2, 4, 4];
-        let mut r: [i32; 6] = [0i32; 6];
-        vst3_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_s8() {
-        let a: [i8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
-        let e: [i8; 48] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48];
-        let mut r: [i8; 48] = [0i8; 48];
-        vst3q_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_s16() {
-        let a: [i16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [i16; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let mut r: [i16; 24] = [0i16; 24];
-        vst3q_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_s32() {
-        let a: [i32; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [i32; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let mut r: [i32; 12] = [0i32; 12];
-        vst3q_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_s64() {
-        let a: [i64; 4] = [0, 1, 2, 2];
-        let e: [i64; 3] = [1, 2, 2];
-        let mut r: [i64; 3] = [0i64; 3];
-        vst3_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_u8() {
-        let a: [u8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [u8; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let mut r: [u8; 24] = [0u8; 24];
-        vst3_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_u16() {
-        let a: [u16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [u16; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let mut r: [u16; 12] = [0u16; 12];
-        vst3_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_u32() {
-        let a: [u32; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u32; 6] = [1, 2, 2, 2, 4, 4];
-        let mut r: [u32; 6] = [0u32; 6];
-        vst3_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_u8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
-        let e: [u8; 48] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48];
-        let mut r: [u8; 48] = [0u8; 48];
-        vst3q_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_u16() {
-        let a: [u16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [u16; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let mut r: [u16; 24] = [0u16; 24];
-        vst3q_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_u32() {
-        let a: [u32; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [u32; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let mut r: [u32; 12] = [0u32; 12];
-        vst3q_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_p8() {
-        let a: [u8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [u8; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let mut r: [u8; 24] = [0u8; 24];
-        vst3_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_p16() {
-        let a: [u16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [u16; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8];
-        let mut r: [u16; 12] = [0u16; 12];
-        vst3_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_p8() {
-        let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48];
-        let e: [u8; 48] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48];
-        let mut r: [u8; 48] = [0u8; 48];
-        vst3q_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_p16() {
-        let a: [u16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [u16; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16];
-        let mut r: [u16; 24] = [0u16; 24];
-        vst3q_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_u64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let e: [u64; 3] = [1, 2, 2];
-        let mut r: [u64; 3] = [0u64; 3];
-        vst3_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_p64() {
-        let a: [u64; 4] = [0, 1, 2, 2];
-        let e: [u64; 3] = [1, 2, 2];
-        let mut r: [u64; 3] = [0u64; 3];
-        vst3_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_f32() {
-        let a: [f32; 7] = [0., 1., 2., 2., 4., 2., 4.];
-        let e: [f32; 6] = [1., 2., 2., 2., 4., 4.];
-        let mut r: [f32; 6] = [0f32; 6];
-        vst3_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_f32() {
-        let a: [f32; 13] = [0., 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8.];
-        let e: [f32; 12] = [1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8.];
-        let mut r: [f32; 12] = [0f32; 12];
-        vst3q_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_s8() {
-        let a: [i8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [i8; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 24] = [0i8; 24];
-        vst3_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_s16() {
-        let a: [i16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [i16; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i16; 12] = [0i16; 12];
-        vst3_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_s32() {
-        let a: [i32; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [i32; 6] = [1, 2, 2, 0, 0, 0];
-        let mut r: [i32; 6] = [0i32; 6];
-        vst3_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_s16() {
-        let a: [i16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [i16; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i16; 24] = [0i16; 24];
-        vst3q_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_s32() {
-        let a: [i32; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [i32; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i32; 12] = [0i32; 12];
-        vst3q_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_u8() {
-        let a: [u8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [u8; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 24] = [0u8; 24];
-        vst3_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_u16() {
-        let a: [u16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [u16; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 12] = [0u16; 12];
-        vst3_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_u32() {
-        let a: [u32; 7] = [0, 1, 2, 2, 4, 2, 4];
-        let e: [u32; 6] = [1, 2, 2, 0, 0, 0];
-        let mut r: [u32; 6] = [0u32; 6];
-        vst3_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_u16() {
-        let a: [u16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [u16; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 24] = [0u16; 24];
-        vst3q_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_u32() {
-        let a: [u32; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [u32; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u32; 12] = [0u32; 12];
-        vst3q_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_p8() {
-        let a: [u8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [u8; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 24] = [0u8; 24];
-        vst3_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_p16() {
-        let a: [u16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8];
-        let e: [u16; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 12] = [0u16; 12];
-        vst3_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_p16() {
-        let a: [u16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16];
-        let e: [u16; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 24] = [0u16; 24];
-        vst3q_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3_lane_f32() {
-        let a: [f32; 7] = [0., 1., 2., 2., 3., 2., 3.];
-        let e: [f32; 6] = [1., 2., 2., 0., 0., 0.];
-        let mut r: [f32; 6] = [0f32; 6];
-        vst3_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst3q_lane_f32() {
-        let a: [f32; 13] = [0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5.];
-        let e: [f32; 12] = [1., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0.];
-        let mut r: [f32; 12] = [0f32; 12];
-        vst3q_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_s8() {
-        let a: [i8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [i8; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let mut r: [i8; 32] = [0i8; 32];
-        vst4_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_s16() {
-        let a: [i16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [i16; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let mut r: [i16; 16] = [0i16; 16];
-        vst4_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_s32() {
-        let a: [i32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i32; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
-        let mut r: [i32; 8] = [0i32; 8];
-        vst4_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_s8() {
-        let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [i8; 64] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let mut r: [i8; 64] = [0i8; 64];
-        vst4q_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_s16() {
-        let a: [i16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [i16; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let mut r: [i16; 32] = [0i16; 32];
-        vst4q_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_s32() {
-        let a: [i32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [i32; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let mut r: [i32; 16] = [0i32; 16];
-        vst4q_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_s64() {
-        let a: [i64; 5] = [0, 1, 2, 2, 6];
-        let e: [i64; 4] = [1, 2, 2, 6];
-        let mut r: [i64; 4] = [0i64; 4];
-        vst4_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_u8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u8; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst4_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_u16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [u16; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst4_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_u32() {
-        let a: [u32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u32; 8] = [1, 2, 2, 6, 2, 6, 6, 8];
-        let mut r: [u32; 8] = [0u32; 8];
-        vst4_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_u8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [u8; 64] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let mut r: [u8; 64] = [0u8; 64];
-        vst4q_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_u16() {
-        let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u16; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let mut r: [u16; 32] = [0u16; 32];
-        vst4q_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_u32() {
-        let a: [u32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [u32; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let mut r: [u32; 16] = [0u32; 16];
-        vst4q_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_p8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u8; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst4_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_p16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [u16; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst4_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_p8() {
-        let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let e: [u8; 64] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64];
-        let mut r: [u8; 64] = [0u8; 64];
-        vst4q_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_p16() {
-        let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u16; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let mut r: [u16; 32] = [0u16; 32];
-        vst4q_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_u64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 6];
-        let e: [u64; 4] = [1, 2, 2, 6];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst4_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_p64() {
-        let a: [u64; 5] = [0, 1, 2, 2, 6];
-        let e: [u64; 4] = [1, 2, 2, 6];
-        let mut r: [u64; 4] = [0u64; 4];
-        vst4_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_f32() {
-        let a: [f32; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
-        let e: [f32; 8] = [1., 2., 2., 6., 2., 6., 6., 8.];
-        let mut r: [f32; 8] = [0f32; 8];
-        vst4_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_f32() {
-        let a: [f32; 17] = [0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.];
-        let e: [f32; 16] = [1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.];
-        let mut r: [f32; 16] = [0f32; 16];
-        vst4q_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_s8() {
-        let a: [i8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [i8; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i8; 32] = [0i8; 32];
-        vst4_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_s16() {
-        let a: [i16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [i16; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i16; 16] = [0i16; 16];
-        vst4_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_s32() {
-        let a: [i32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [i32; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
-        let mut r: [i32; 8] = [0i32; 8];
-        vst4_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_s16() {
-        let a: [i16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [i16; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i16; 32] = [0i16; 32];
-        vst4q_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_s32() {
-        let a: [i32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [i32; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [i32; 16] = [0i32; 16];
-        vst4q_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_u8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u8; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst4_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_u16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [u16; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst4_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_u32() {
-        let a: [u32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8];
-        let e: [u32; 8] = [1, 2, 2, 6, 0, 0, 0, 0];
-        let mut r: [u32; 8] = [0u32; 8];
-        vst4_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_u16() {
-        let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u16; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 32] = [0u16; 32];
-        vst4q_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_u32() {
-        let a: [u32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [u32; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u32; 16] = [0u32; 16];
-        vst4q_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_p8() {
-        let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u8; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u8; 32] = [0u8; 32];
-        vst4_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_p16() {
-        let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16];
-        let e: [u16; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 16] = [0u16; 16];
-        vst4_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_p16() {
-        let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32];
-        let e: [u16; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
-        let mut r: [u16; 32] = [0u16; 32];
-        vst4q_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4_lane_f32() {
-        let a: [f32; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.];
-        let e: [f32; 8] = [1., 2., 2., 6., 0., 0., 0., 0.];
-        let mut r: [f32; 8] = [0f32; 8];
-        vst4_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vst4q_lane_f32() {
-        let a: [f32; 17] = [0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.];
-        let e: [f32; 16] = [1., 2., 2., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.];
-        let mut r: [f32; 16] = [0f32; 16];
-        vst4q_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vusdot_s32() {
-        let a: i32x2 = i32x2::new(1000, -4200);
-        let b: u8x8 = u8x8::new(100, 205, 110, 195, 120, 185, 130, 175);
-        let c: i8x8 = i8x8::new(0, 1, 2, 3, -1, -2, -3, -4);
-        let e: i32x2 = i32x2::new(2010, -5780);
-        let r: i32x2 = transmute(vusdot_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vusdotq_s32() {
-        let a: i32x4 = i32x4::new(1000, -4200, -1000, 2000);
-        let b: u8x16 = u8x16::new(100, 205, 110, 195, 120, 185, 130, 175, 140, 165, 150, 155, 160, 145, 170, 135);
-        let c: i8x16 = i8x16::new(0, 1, 2, 3, -1, -2, -3, -4, 4, 5, 6, 7, -5, -6, -7, -8);
-        let e: i32x4 = i32x4::new(2010, -5780, 2370, -1940);
-        let r: i32x4 = transmute(vusdotq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vusdot_lane_s32() {
-        let a: i32x2 = i32x2::new(1000, -4200);
-        let b: u8x8 = u8x8::new(100, 110, 120, 130, 140, 150, 160, 170);
-        let c: i8x8 = i8x8::new(4, 3, 2, 1, 0, -1, -2, -3);
-        let e: i32x2 = i32x2::new(2100, -2700);
-        let r: i32x2 = transmute(vusdot_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-
-        let a: i32x2 = i32x2::new(1000, -4200);
-        let b: u8x8 = u8x8::new(100, 110, 120, 130, 140, 150, 160, 170);
-        let c: i8x8 = i8x8::new(4, 3, 2, 1, 0, -1, -2, -3);
-        let e: i32x2 = i32x2::new(260, -5180);
-        let r: i32x2 = transmute(vusdot_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vusdotq_lane_s32() {
-        let a: i32x4 = i32x4::new(1000, -4200, -1000, 2000);
-        let b: u8x16 = u8x16::new(100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250);
-        let c: i8x8 = i8x8::new(4, 3, 2, 1, 0, -1, -2, -3);
-        let e: i32x4 = i32x4::new(2100, -2700, 900, 4300);
-        let r: i32x4 = transmute(vusdotq_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-
-        let a: i32x4 = i32x4::new(1000, -4200, -1000, 2000);
-        let b: u8x16 = u8x16::new(100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250);
-        let c: i8x8 = i8x8::new(4, 3, 2, 1, 0, -1, -2, -3);
-        let e: i32x4 = i32x4::new(260, -5180, -2220, 540);
-        let r: i32x4 = transmute(vusdotq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vsudot_lane_s32() {
-        let a: i32x2 = i32x2::new(-2000, 4200);
-        let b: i8x8 = i8x8::new(4, 3, 2, 1, 0, -1, -2, -3);
-        let c: u8x8 = u8x8::new(100, 110, 120, 130, 140, 150, 160, 170);
-        let e: i32x2 = i32x2::new(-900, 3460);
-        let r: i32x2 = transmute(vsudot_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-
-        let a: i32x2 = i32x2::new(-2000, 4200);
-        let b: i8x8 = i8x8::new(4, 3, 2, 1, 0, -1, -2, -3);
-        let c: u8x8 = u8x8::new(100, 110, 120, 130, 140, 150, 160, 170);
-        let e: i32x2 = i32x2::new(-500, 3220);
-        let r: i32x2 = transmute(vsudot_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vsudotq_lane_s32() {
-        let a: i32x4 = i32x4::new(-2000, 4200, -1000, 2000);
-        let b: i8x16 = i8x16::new(4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11);
-        let c: u8x8 = u8x8::new(100, 110, 120, 130, 140, 150, 160, 170);
-        let e: i32x4 = i32x4::new(-900, 3460, -3580, -2420);
-        let r: i32x4 = transmute(vsudotq_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-
-        let a: i32x4 = i32x4::new(-2000, 4200, -1000, 2000);
-        let b: i8x16 = i8x16::new(4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11);
-        let c: u8x8 = u8x8::new(100, 110, 120, 130, 140, 150, 160, 170);
-        let e: i32x4 = i32x4::new(-500, 3220, -4460, -3940);
-        let r: i32x4 = transmute(vsudotq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(1, 4, 3, 8, 5, 12, 7, 16);
-        let r: i8x8 = transmute(vmul_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32);
-        let r: i8x16 = transmute(vmulq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 1, 2);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(1, 4, 3, 8);
-        let r: i16x4 = transmute(vmul_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(1, 4, 3, 8, 5, 12, 7, 16);
-        let r: i16x8 = transmute(vmulq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(1, 4);
-        let r: i32x2 = transmute(vmul_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 1, 2);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(1, 4, 3, 8);
-        let r: i32x4 = transmute(vmulq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(1, 4, 3, 8, 5, 12, 7, 16);
-        let r: u8x8 = transmute(vmul_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32);
-        let r: u8x16 = transmute(vmulq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 1, 2);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(1, 4, 3, 8);
-        let r: u16x4 = transmute(vmul_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(1, 4, 3, 8, 5, 12, 7, 16);
-        let r: u16x8 = transmute(vmulq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(1, 4);
-        let r: u32x2 = transmute(vmul_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 1, 2);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(1, 4, 3, 8);
-        let r: u32x4 = transmute(vmulq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_p8() {
-        let a: i8x8 = i8x8::new(1, 3, 1, 3, 1, 3, 1, 3);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(1, 6, 3, 12, 5, 10, 7, 24);
-        let r: i8x8 = transmute(vmul_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_p8() {
-        let a: i8x16 = i8x16::new(1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48);
-        let r: i8x16 = transmute(vmulq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let b: f32x2 = f32x2::new(2.0, 3.0);
-        let e: f32x2 = f32x2::new(2.0, 6.0);
-        let r: f32x2 = transmute(vmul_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 1.0, 2.0);
-        let b: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
-        let e: f32x4 = f32x4::new(2.0, 6.0, 4.0, 10.0);
-        let r: f32x4 = transmute(vmulq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_n_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16 = 2;
-        let e: i16x4 = i16x4::new(2, 4, 6, 8);
-        let r: i16x4 = transmute(vmul_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_n_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16 = 2;
-        let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
-        let r: i16x8 = transmute(vmulq_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_n_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32 = 2;
-        let e: i32x2 = i32x2::new(2, 4);
-        let r: i32x2 = transmute(vmul_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_n_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32 = 2;
-        let e: i32x4 = i32x4::new(2, 4, 6, 8);
-        let r: i32x4 = transmute(vmulq_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_n_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16 = 2;
-        let e: u16x4 = u16x4::new(2, 4, 6, 8);
-        let r: u16x4 = transmute(vmul_n_u16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_n_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16 = 2;
-        let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
-        let r: u16x8 = transmute(vmulq_n_u16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_n_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32 = 2;
-        let e: u32x2 = u32x2::new(2, 4);
-        let r: u32x2 = transmute(vmul_n_u32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_n_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32 = 2;
-        let e: u32x4 = u32x4::new(2, 4, 6, 8);
-        let r: u32x4 = transmute(vmulq_n_u32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_n_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32 = 2.;
-        let e: f32x2 = f32x2::new(2., 4.);
-        let r: f32x2 = transmute(vmul_n_f32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_n_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32 = 2.;
-        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
-        let r: f32x4 = transmute(vmulq_n_f32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16x4 = i16x4::new(2, 4, 6, 8);
-        let r: i16x4 = transmute(vmul_lane_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_laneq_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(2, 4, 6, 8);
-        let r: i16x4 = transmute(vmul_laneq_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_lane_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
-        let r: i16x8 = transmute(vmulq_lane_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_laneq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
-        let r: i16x8 = transmute(vmulq_laneq_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i32x2 = i32x2::new(2, 4);
-        let r: i32x2 = transmute(vmul_lane_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_laneq_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32x2 = i32x2::new(2, 4);
-        let r: i32x2 = transmute(vmul_laneq_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_lane_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i32x4 = i32x4::new(2, 4, 6, 8);
-        let r: i32x4 = transmute(vmulq_lane_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_laneq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(2, 4, 6, 8);
-        let r: i32x4 = transmute(vmulq_laneq_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(0, 2, 0, 0);
-        let e: u16x4 = u16x4::new(2, 4, 6, 8);
-        let r: u16x4 = transmute(vmul_lane_u16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_laneq_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: u16x4 = u16x4::new(2, 4, 6, 8);
-        let r: u16x4 = transmute(vmul_laneq_u16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_lane_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x4 = u16x4::new(0, 2, 0, 0);
-        let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
-        let r: u16x8 = transmute(vmulq_lane_u16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_laneq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
-        let r: u16x8 = transmute(vmulq_laneq_u16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(0, 2);
-        let e: u32x2 = u32x2::new(2, 4);
-        let r: u32x2 = transmute(vmul_lane_u32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_laneq_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x4 = u32x4::new(0, 2, 0, 0);
-        let e: u32x2 = u32x2::new(2, 4);
-        let r: u32x2 = transmute(vmul_laneq_u32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_lane_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x2 = u32x2::new(0, 2);
-        let e: u32x4 = u32x4::new(2, 4, 6, 8);
-        let r: u32x4 = transmute(vmulq_lane_u32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_laneq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(0, 2, 0, 0);
-        let e: u32x4 = u32x4::new(2, 4, 6, 8);
-        let r: u32x4 = transmute(vmulq_laneq_u32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_lane_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(2., 0.);
-        let e: f32x2 = f32x2::new(2., 4.);
-        let r: f32x2 = transmute(vmul_lane_f32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_laneq_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x2 = f32x2::new(2., 4.);
-        let r: f32x2 = transmute(vmul_laneq_f32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_lane_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x2 = f32x2::new(2., 0.);
-        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
-        let r: f32x4 = transmute(vmulq_lane_f32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_laneq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x4 = f32x4::new(2., 0., 0., 0.);
-        let e: f32x4 = f32x4::new(2., 4., 6., 8.);
-        let r: f32x4 = transmute(vmulq_laneq_f32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i16x8 = i16x8::new(1, 4, 3, 8, 5, 12, 7, 16);
-        let r: i16x8 = transmute(vmull_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(1, 2, 1, 2);
-        let e: i32x4 = i32x4::new(1, 4, 3, 8);
-        let r: i32x4 = transmute(vmull_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i64x2 = i64x2::new(1, 4);
-        let r: i64x2 = transmute(vmull_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u16x8 = u16x8::new(1, 4, 3, 8, 5, 12, 7, 16);
-        let r: u16x8 = transmute(vmull_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(1, 2, 1, 2);
-        let e: u32x4 = u32x4::new(1, 4, 3, 8);
-        let r: u32x4 = transmute(vmull_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u64x2 = u64x2::new(1, 4);
-        let r: u64x2 = transmute(vmull_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_p8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(1, 3, 1, 3, 1, 3, 1, 3);
-        let e: i16x8 = i16x8::new(1, 6, 3, 12, 5, 10, 7, 24);
-        let r: i16x8 = transmute(vmull_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_n_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16 = 2;
-        let e: i32x4 = i32x4::new(2, 4, 6, 8);
-        let r: i32x4 = transmute(vmull_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_n_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32 = 2;
-        let e: i64x2 = i64x2::new(2, 4);
-        let r: i64x2 = transmute(vmull_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_n_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16 = 2;
-        let e: u32x4 = u32x4::new(2, 4, 6, 8);
-        let r: u32x4 = transmute(vmull_n_u16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_n_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32 = 2;
-        let e: u64x2 = u64x2::new(2, 4);
-        let r: u64x2 = transmute(vmull_n_u32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(2, 4, 6, 8);
-        let r: i32x4 = transmute(vmull_lane_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_laneq_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i32x4 = i32x4::new(2, 4, 6, 8);
-        let r: i32x4 = transmute(vmull_laneq_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(2, 4);
-        let r: i64x2 = transmute(vmull_lane_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_laneq_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i64x2 = i64x2::new(2, 4);
-        let r: i64x2 = transmute(vmull_laneq_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_lane_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(0, 2, 0, 0);
-        let e: u32x4 = u32x4::new(2, 4, 6, 8);
-        let r: u32x4 = transmute(vmull_lane_u16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_laneq_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: u32x4 = u32x4::new(2, 4, 6, 8);
-        let r: u32x4 = transmute(vmull_laneq_u16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(0, 2);
-        let e: u64x2 = u64x2::new(2, 4);
-        let r: u64x2 = transmute(vmull_lane_u32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmull_laneq_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x4 = u32x4::new(0, 2, 0, 0);
-        let e: u64x2 = u64x2::new(2, 4);
-        let r: u64x2 = transmute(vmull_laneq_u32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vfma_f32() {
-        let a: f32x2 = f32x2::new(8.0, 18.0);
-        let b: f32x2 = f32x2::new(6.0, 4.0);
-        let c: f32x2 = f32x2::new(2.0, 3.0);
-        let e: f32x2 = f32x2::new(20.0, 30.0);
-        let r: f32x2 = transmute(vfma_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vfmaq_f32() {
-        let a: f32x4 = f32x4::new(8.0, 18.0, 12.0, 10.0);
-        let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0);
-        let c: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
-        let e: f32x4 = f32x4::new(20.0, 30.0, 40.0, 50.0);
-        let r: f32x4 = transmute(vfmaq_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vfma_n_f32() {
-        let a: f32x2 = f32x2::new(2.0, 3.0);
-        let b: f32x2 = f32x2::new(6.0, 4.0);
-        let c: f32 = 8.0;
-        let e: f32x2 = f32x2::new(50.0, 35.0);
-        let r: f32x2 = transmute(vfma_n_f32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vfmaq_n_f32() {
-        let a: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
-        let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0);
-        let c: f32 = 8.0;
-        let e: f32x4 = f32x4::new(50.0, 35.0, 60.0, 69.0);
-        let r: f32x4 = transmute(vfmaq_n_f32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vfms_f32() {
-        let a: f32x2 = f32x2::new(20.0, 30.0);
-        let b: f32x2 = f32x2::new(6.0, 4.0);
-        let c: f32x2 = f32x2::new(2.0, 3.0);
-        let e: f32x2 = f32x2::new(8.0, 18.0);
-        let r: f32x2 = transmute(vfms_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vfmsq_f32() {
-        let a: f32x4 = f32x4::new(20.0, 30.0, 40.0, 50.0);
-        let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0);
-        let c: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
-        let e: f32x4 = f32x4::new(8.0, 18.0, 12.0, 10.0);
-        let r: f32x4 = transmute(vfmsq_f32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vfms_n_f32() {
-        let a: f32x2 = f32x2::new(50.0, 35.0);
-        let b: f32x2 = f32x2::new(6.0, 4.0);
-        let c: f32 = 8.0;
-        let e: f32x2 = f32x2::new(2.0, 3.0);
-        let r: f32x2 = transmute(vfms_n_f32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vfmsq_n_f32() {
-        let a: f32x4 = f32x4::new(50.0, 35.0, 60.0, 69.0);
-        let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0);
-        let c: f32 = 8.0;
-        let e: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
-        let r: f32x4 = transmute(vfmsq_n_f32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i8x8 = i8x8::new(0, 0, 2, 2, 4, 4, 6, 6);
-        let r: i8x8 = transmute(vsub_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i8x16 = i8x16::new(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
-        let r: i8x16 = transmute(vsubq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(1, 2, 1, 2);
-        let e: i16x4 = i16x4::new(0, 0, 2, 2);
-        let r: i16x4 = transmute(vsub_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i16x8 = i16x8::new(0, 0, 2, 2, 4, 4, 6, 6);
-        let r: i16x8 = transmute(vsubq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vsub_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(1, 2, 1, 2);
-        let e: i32x4 = i32x4::new(0, 0, 2, 2);
-        let r: i32x4 = transmute(vsubq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u8x8 = u8x8::new(0, 0, 2, 2, 4, 4, 6, 6);
-        let r: u8x8 = transmute(vsub_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u8x16 = u8x16::new(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
-        let r: u8x16 = transmute(vsubq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(1, 2, 1, 2);
-        let e: u16x4 = u16x4::new(0, 0, 2, 2);
-        let r: u16x4 = transmute(vsub_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u16x8 = u16x8::new(0, 0, 2, 2, 4, 4, 6, 6);
-        let r: u16x8 = transmute(vsubq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vsub_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 1, 2);
-        let e: u32x4 = u32x4::new(0, 0, 2, 2);
-        let r: u32x4 = transmute(vsubq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(1);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vsub_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vsubq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vsub_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vsubq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_f32() {
-        let a: f32x2 = f32x2::new(1.0, 4.0);
-        let b: f32x2 = f32x2::new(1.0, 2.0);
-        let e: f32x2 = f32x2::new(0.0, 2.0);
-        let r: f32x2 = transmute(vsub_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 4.0, 3.0, 8.0);
-        let b: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e: f32x4 = f32x4::new(0.0, 2.0, 0.0, 4.0);
-        let r: f32x4 = transmute(vsubq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_p8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let e: i8x8 = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let r: i8x8 = transmute(vadd_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_p16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(1, 1, 1, 1);
-        let e: i16x4 = i16x4::new(0, 3, 2, 5);
-        let r: i16x4 = transmute(vadd_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_p8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let e: i8x16 = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17);
-        let r: i8x16 = transmute(vaddq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_p16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let e: i16x8 = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let r: i16x8 = transmute(vaddq_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_p64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(1);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vadd_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_p64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(1, 1);
-        let e: i64x2 = i64x2::new(0, 3);
-        let r: i64x2 = transmute(vaddq_p64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_p128() {
-        let a: p128 = 16;
-        let b: p128 = 1;
-        let e: p128 = 17;
-        let r: p128 = vaddq_p128(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, -32768, 1, 1, 0x7F_FF, -32768, 1, 1);
-        let b: i16x8 = i16x8::new(1, 0, 0, 0, 1, 0, 0, 0);
-        let e: i8x8 = i8x8::new(0x7F, -128, 0, 0, 0x7F, -128, 0, 0);
-        let r: i8x8 = transmute(vsubhn_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 1, 1);
-        let b: i32x4 = i32x4::new(1, 0, 0, 0);
-        let e: i16x4 = i16x4::new(0x7F_FF, -32768, 0, 0);
-        let r: i16x4 = transmute(vsubhn_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_s64() {
-        let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
-        let b: i64x2 = i64x2::new(1, 0);
-        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648);
-        let r: i32x2 = transmute(vsubhn_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_u16() {
-        let a: u16x8 = u16x8::new(0xFF_FF, 0, 1, 1, 0xFF_FF, 0, 1, 1);
-        let b: u16x8 = u16x8::new(1, 0, 0, 0, 1, 0, 0, 0);
-        let e: u8x8 = u8x8::new(0xFF, 0, 0, 0, 0xFF, 0, 0, 0);
-        let r: u8x8 = transmute(vsubhn_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_u32() {
-        let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 1, 1);
-        let b: u32x4 = u32x4::new(1, 0, 0, 0);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0, 0, 0);
-        let r: u16x4 = transmute(vsubhn_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_u64() {
-        let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let b: u64x2 = u64x2::new(1, 0);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let r: u32x2 = transmute(vsubhn_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_high_s16() {
-        let a: i8x8 = i8x8::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0);
-        let b: i16x8 = i16x8::new(0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1);
-        let c: i16x8 = i16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
-        let e: i8x16 = i8x16::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0);
-        let r: i8x16 = transmute(vsubhn_high_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_high_s32() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0, 0x7F_FF, 0);
-        let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 1, 0x7F_FF_FF_FF, 1);
-        let c: i32x4 = i32x4::new(1, 0, 1, 0);
-        let e: i16x8 = i16x8::new(0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0);
-        let r: i16x8 = transmute(vsubhn_high_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_high_s64() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0);
-        let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 1);
-        let c: i64x2 = i64x2::new(1, 0);
-        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, 0x7F_FF_FF_FF, 0);
-        let r: i32x4 = transmute(vsubhn_high_s64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_high_u16() {
-        let a: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let b: u16x8 = u16x8::new(0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1);
-        let c: u16x8 = u16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
-        let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
-        let r: u8x16 = transmute(vsubhn_high_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_high_u32() {
-        let a: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0);
-        let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 1, 0xFF_FF_FF_FF, 1);
-        let c: u32x4 = u32x4::new(1, 0, 1, 0);
-        let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0);
-        let r: u16x8 = transmute(vsubhn_high_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubhn_high_u64() {
-        let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 1);
-        let c: u64x2 = u64x2::new(1, 0);
-        let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
-        let r: u32x4 = transmute(vsubhn_high_u64(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u8x8 = u8x8::new(0, 0, 1, 1, 2, 2, 3, 3);
-        let r: u8x8 = transmute(vhsub_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u8x16 = u8x16::new(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7);
-        let r: u8x16 = transmute(vhsubq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(1, 2, 1, 2);
-        let e: u16x4 = u16x4::new(0, 0, 1, 1);
-        let r: u16x4 = transmute(vhsub_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: u16x8 = u16x8::new(0, 0, 1, 1, 2, 2, 3, 3);
-        let r: u16x8 = transmute(vhsubq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vhsub_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 1, 2);
-        let e: u32x4 = u32x4::new(0, 0, 1, 1);
-        let r: u32x4 = transmute(vhsubq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i8x8 = i8x8::new(0, 0, 1, 1, 2, 2, 3, 3);
-        let r: i8x8 = transmute(vhsub_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i8x16 = i8x16::new(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7);
-        let r: i8x16 = transmute(vhsubq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(1, 2, 1, 2);
-        let e: i16x4 = i16x4::new(0, 0, 1, 1);
-        let r: i16x4 = transmute(vhsub_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
-        let e: i16x8 = i16x8::new(0, 0, 1, 1, 2, 2, 3, 3);
-        let r: i16x8 = transmute(vhsubq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vhsub_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(1, 2, 1, 2);
-        let e: i32x4 = i32x4::new(0, 0, 1, 1);
-        let r: i32x4 = transmute(vhsubq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_s8() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vsubw_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_s16() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: i32x4 = i32x4::new(0, 0, 0, 0);
-        let r: i32x4 = transmute(vsubw_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_s32() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i32x2 = i32x2::new(0, 1);
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vsubw_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_u8() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vsubw_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_u16() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0);
-        let r: u32x4 = transmute(vsubw_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubw_u32() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: u32x2 = u32x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vsubw_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_s8() {
-        let a: i8x8 = i8x8::new(0x7F, -128, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(0x7F, -128, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vsubl_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_s16() {
-        let a: i16x4 = i16x4::new(0x7F_FF, -32768, 2, 3);
-        let b: i16x4 = i16x4::new(0x7F_FF, -32768, 2, 3);
-        let e: i32x4 = i32x4::new(0, 0, 0, 0);
-        let r: i32x4 = transmute(vsubl_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_s32() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648);
-        let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648);
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vsubl_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_u8() {
-        let a: u8x8 = u8x8::new(0xFF, 0, 2, 3, 4, 5, 6, 7);
-        let b: u8x8 = u8x8::new(0xFF, 0, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vsubl_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_u16() {
-        let a: u16x4 = u16x4::new(0xFF_FF, 0, 2, 3);
-        let b: u16x4 = u16x4::new(0xFF_FF, 0, 2, 3);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0);
-        let r: u32x4 = transmute(vsubl_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubl_u32() {
-        let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let b: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vsubl_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x2 = i32x2::new(31, 176);
-        let r: i32x2 = transmute(vdot_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 1, 2);
-        let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x4 = i32x4::new(31, 176, 31, 176);
-        let r: i32x4 = transmute(vdotq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x2 = u32x2::new(31, 176);
-        let r: u32x2 = transmute(vdot_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 1, 2);
-        let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x4 = u32x4::new(31, 176, 31, 176);
-        let r: u32x4 = transmute(vdotq_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i8x8 = i8x8::new(-1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x2 = i32x2::new(29, 72);
-        let r: i32x2 = transmute(vdot_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_lane_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 1, 2);
-        let b: i8x16 = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i32x4 = i32x4::new(29, 72, 31, 72);
-        let r: i32x4 = transmute(vdotq_lane_s32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdot_lane_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u8x8 = u8x8::new(255, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x2 = u32x2::new(285, 72);
-        let r: u32x2 = transmute(vdot_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,dotprod")]
-    unsafe fn test_vdotq_lane_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 1, 2);
-        let b: u8x16 = u8x16::new(255, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u32x4 = u32x4::new(285, 72, 31, 72);
-        let r: u32x4 = transmute(vdotq_lane_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmax_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let r: i8x8 = transmute(vmax_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
-        let e: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vmaxq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmax_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(16, 15, 14, 13);
-        let e: i16x4 = i16x4::new(16, 15, 14, 13);
-        let r: i16x4 = transmute(vmax_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let r: i16x8 = transmute(vmaxq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmax_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(16, 15);
-        let e: i32x2 = i32x2::new(16, 15);
-        let r: i32x2 = transmute(vmax_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(16, 15, 14, 13);
-        let e: i32x4 = i32x4::new(16, 15, 14, 13);
-        let r: i32x4 = transmute(vmaxq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmax_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let r: u8x8 = transmute(vmax_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
-        let e: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vmaxq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmax_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(16, 15, 14, 13);
-        let e: u16x4 = u16x4::new(16, 15, 14, 13);
-        let r: u16x4 = transmute(vmax_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let r: u16x8 = transmute(vmaxq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmax_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(16, 15);
-        let e: u32x2 = u32x2::new(16, 15);
-        let r: u32x2 = transmute(vmax_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(16, 15, 14, 13);
-        let e: u32x4 = u32x4::new(16, 15, 14, 13);
-        let r: u32x4 = transmute(vmaxq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmax_f32() {
-        let a: f32x2 = f32x2::new(1.0, -2.0);
-        let b: f32x2 = f32x2::new(0.0, 3.0);
-        let e: f32x2 = f32x2::new(1.0, 3.0);
-        let r: f32x2 = transmute(vmax_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxq_f32() {
-        let a: f32x4 = f32x4::new(1.0, -2.0, 3.0, -4.0);
-        let b: f32x4 = f32x4::new(0.0, 3.0, 2.0, 8.0);
-        let e: f32x4 = f32x4::new(1.0, 3.0, 3.0, 8.0);
-        let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vmaxnm_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let b: f32x2 = f32x2::new(8.0, 16.0);
-        let e: f32x2 = f32x2::new(8.0, 16.0);
-        let r: f32x2 = transmute(vmaxnm_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vmaxnmq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
-        let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
-        let e: f32x4 = f32x4::new(8.0, 16.0, 3.0, 6.0);
-        let r: f32x4 = transmute(vmaxnmq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmin_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vmin_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
-        let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1);
-        let r: i8x16 = transmute(vminq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmin_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(16, 15, 14, 13);
-        let e: i16x4 = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vmin_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vminq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmin_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(16, 15);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vmin_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(16, 15, 14, 13);
-        let e: i32x4 = i32x4::new(1, 2, 3, 4);
-        let r: i32x4 = transmute(vminq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmin_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vmin_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
-        let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1);
-        let r: u8x16 = transmute(vminq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmin_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(16, 15, 14, 13);
-        let e: u16x4 = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vmin_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9);
-        let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vminq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmin_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(16, 15);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vmin_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(16, 15, 14, 13);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vminq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmin_f32() {
-        let a: f32x2 = f32x2::new(1.0, -2.0);
-        let b: f32x2 = f32x2::new(0.0, 3.0);
-        let e: f32x2 = f32x2::new(0.0, -2.0);
-        let r: f32x2 = transmute(vmin_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminq_f32() {
-        let a: f32x4 = f32x4::new(1.0, -2.0, 3.0, -4.0);
-        let b: f32x4 = f32x4::new(0.0, 3.0, 2.0, 8.0);
-        let e: f32x4 = f32x4::new(0.0, -2.0, 2.0, -4.0);
-        let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vminnm_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let b: f32x2 = f32x2::new(8.0, 16.0);
-        let e: f32x2 = f32x2::new(1.0, 2.0);
-        let r: f32x2 = transmute(vminnm_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[cfg_attr(target_arch = "arm", simd_test(enable = "neon,crc"))]
-    #[cfg_attr(target_arch = "aarch64", simd_test(enable = "neon"))]
-    #[cfg_attr(target_arch = "arm64ec", simd_test(enable = "neon"))]
-    unsafe fn test_vminnmq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
-        let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
-        let e: f32x4 = f32x4::new(1.0, 2.0, -1.0, -4.0);
-        let r: f32x4 = transmute(vminnmq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpadd_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(3., 4.);
-        let e: f32x2 = f32x2::new(3., 7.);
-        let r: f32x2 = transmute(vpadd_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(0, 4, 12, 24);
-        let r: i32x4 = transmute(vqdmull_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(1, 2);
-        let e: i64x2 = i64x2::new(0, 4);
-        let r: i64x2 = transmute(vqdmull_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_n_s16() {
-        let a: i16x4 = i16x4::new(2, 4, 6, 8);
-        let b: i16 = 2;
-        let e: i32x4 = i32x4::new(8, 16, 24, 32);
-        let r: i32x4 = transmute(vqdmull_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_n_s32() {
-        let a: i32x2 = i32x2::new(2, 4);
-        let b: i32 = 2;
-        let e: i64x2 = i64x2::new(8, 16);
-        let r: i64x2 = transmute(vqdmull_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_lane_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(0, 2, 2, 0);
-        let e: i32x4 = i32x4::new(4, 8, 12, 16);
-        let r: i32x4 = transmute(vqdmull_lane_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmull_lane_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(4, 8);
-        let r: i64x2 = transmute(vqdmull_lane_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_s16() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let c: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(5, 9, 13, 17);
-        let r: i32x4 = transmute(vqdmlal_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_s32() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let b: i32x2 = i32x2::new(1, 2);
-        let c: i32x2 = i32x2::new(2, 2);
-        let e: i64x2 = i64x2::new(5, 9);
-        let r: i64x2 = transmute(vqdmlal_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_n_s16() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let c: i16 = 2;
-        let e: i32x4 = i32x4::new(5, 9, 13, 17);
-        let r: i32x4 = transmute(vqdmlal_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_n_s32() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let b: i32x2 = i32x2::new(1, 2);
-        let c: i32 = 2;
-        let e: i64x2 = i64x2::new(5, 9);
-        let r: i64x2 = transmute(vqdmlal_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_lane_s16() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let c: i16x4 = i16x4::new(0, 2, 2, 0);
-        let e: i32x4 = i32x4::new(5, 10, 15, 20);
-        let r: i32x4 = transmute(vqdmlal_lane_s16::<2>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlal_lane_s32() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i32x2 = i32x2::new(1, 2);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(5, 10);
-        let r: i64x2 = transmute(vqdmlal_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_s16() {
-        let a: i32x4 = i32x4::new(3, 7, 11, 15);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let c: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(-1, -1, -1, -1);
-        let r: i32x4 = transmute(vqdmlsl_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_s32() {
-        let a: i64x2 = i64x2::new(3, 7);
-        let b: i32x2 = i32x2::new(1, 2);
-        let c: i32x2 = i32x2::new(2, 2);
-        let e: i64x2 = i64x2::new(-1, -1);
-        let r: i64x2 = transmute(vqdmlsl_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_n_s16() {
-        let a: i32x4 = i32x4::new(3, 7, 11, 15);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let c: i16 = 2;
-        let e: i32x4 = i32x4::new(-1, -1, -1, -1);
-        let r: i32x4 = transmute(vqdmlsl_n_s16(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_n_s32() {
-        let a: i64x2 = i64x2::new(3, 7);
-        let b: i32x2 = i32x2::new(1, 2);
-        let c: i32 = 2;
-        let e: i64x2 = i64x2::new(-1, -1);
-        let r: i64x2 = transmute(vqdmlsl_n_s32(transmute(a), transmute(b), c));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_lane_s16() {
-        let a: i32x4 = i32x4::new(3, 6, 9, 12);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let c: i16x4 = i16x4::new(0, 2, 2, 0);
-        let e: i32x4 = i32x4::new(-1, -2, -3, -4);
-        let r: i32x4 = transmute(vqdmlsl_lane_s16::<2>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmlsl_lane_s32() {
-        let a: i64x2 = i64x2::new(3, 6);
-        let b: i32x2 = i32x2::new(1, 2);
-        let c: i32x2 = i32x2::new(0, 2);
-        let e: i64x2 = i64x2::new(-1, -2);
-        let r: i64x2 = transmute(vqdmlsl_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulh_s16() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vqdmulh_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhq_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vqdmulhq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulh_s32() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: i32x2 = i32x2::new(1, 1);
-        let r: i32x2 = transmute(vqdmulh_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhq_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(1, 1, 1, 1);
-        let r: i32x4 = transmute(vqdmulhq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulh_n_s16() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16 = 2;
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vqdmulh_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulh_n_s32() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32 = 2;
-        let e: i32x2 = i32x2::new(1, 1);
-        let r: i32x2 = transmute(vqdmulh_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhq_n_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16 = 2;
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vqdmulhq_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhq_n_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32 = 2;
-        let e: i32x4 = i32x4::new(1, 1, 1, 1);
-        let r: i32x4 = transmute(vqdmulhq_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhq_laneq_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
-        let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let r: i16x8 = transmute(vqdmulhq_laneq_s16::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulh_laneq_s16() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1);
-        let e: i16x4 = i16x4::new(1, 1, 1, 1);
-        let r: i16x4 = transmute(vqdmulh_laneq_s16::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulhq_laneq_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x4 = i32x4::new(2, 1, 1, 1);
-        let e: i32x4 = i32x4::new(1, 1, 1, 1);
-        let r: i32x4 = transmute(vqdmulhq_laneq_s32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqdmulh_laneq_s32() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x4 = i32x4::new(2, 1, 1, 1);
-        let e: i32x2 = i32x2::new(1, 1);
-        let r: i32x2 = transmute(vqdmulh_laneq_s32::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let e: i8x8 = i8x8::new(0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
-        let r: i8x8 = transmute(vqmovn_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let e: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let r: i16x4 = transmute(vqmovn_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_s64() {
-        let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let r: i32x2 = transmute(vqmovn_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_u16() {
-        let a: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-        let r: u8x8 = transmute(vqmovn_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_u32() {
-        let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
-        let r: u16x4 = transmute(vqmovn_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovn_u64() {
-        let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
-        let r: u32x2 = transmute(vqmovn_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovun_s16() {
-        let a: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vqmovun_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovun_s32() {
-        let a: i32x4 = i32x4::new(-1, -1, -1, -1);
-        let e: u16x4 = u16x4::new(0, 0, 0, 0);
-        let r: u16x4 = transmute(vqmovun_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqmovun_s64() {
-        let a: i64x2 = i64x2::new(-1, -1);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vqmovun_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulh_s16() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(2, 2, 2, 2);
-        let r: i16x4 = transmute(vqrdmulh_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhq_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let r: i16x8 = transmute(vqrdmulhq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulh_s32() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: i32x2 = i32x2::new(2, 2);
-        let r: i32x2 = transmute(vqrdmulh_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhq_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(2, 2, 2, 2);
-        let r: i32x4 = transmute(vqrdmulhq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulh_n_s16() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16 = 2;
-        let e: i16x4 = i16x4::new(2, 2, 2, 2);
-        let r: i16x4 = transmute(vqrdmulh_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhq_n_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16 = 2;
-        let e: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let r: i16x8 = transmute(vqrdmulhq_n_s16(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulh_n_s32() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32 = 2;
-        let e: i32x2 = i32x2::new(2, 2);
-        let r: i32x2 = transmute(vqrdmulh_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhq_n_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32 = 2;
-        let e: i32x4 = i32x4::new(2, 2, 2, 2);
-        let r: i32x4 = transmute(vqrdmulhq_n_s32(transmute(a), b));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulh_lane_s16() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16x4 = i16x4::new(2, 2, 2, 2);
-        let r: i16x4 = transmute(vqrdmulh_lane_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulh_laneq_s16() {
-        let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16x4 = i16x4::new(2, 2, 2, 2);
-        let r: i16x4 = transmute(vqrdmulh_laneq_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhq_lane_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x4 = i16x4::new(0, 2, 0, 0);
-        let e: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let r: i16x8 = transmute(vqrdmulhq_lane_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhq_laneq_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
-        let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
-        let e: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let r: i16x8 = transmute(vqrdmulhq_laneq_s16::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulh_lane_s32() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i32x2 = i32x2::new(2, 2);
-        let r: i32x2 = transmute(vqrdmulh_lane_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulh_laneq_s32() {
-        let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32x2 = i32x2::new(2, 2);
-        let r: i32x2 = transmute(vqrdmulh_laneq_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhq_lane_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i32x4 = i32x4::new(2, 2, 2, 2);
-        let r: i32x4 = transmute(vqrdmulhq_lane_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrdmulhq_laneq_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let b: i32x4 = i32x4::new(0, 2, 0, 0);
-        let e: i32x4 = i32x4::new(2, 2, 2, 2);
-        let r: i32x4 = transmute(vqrdmulhq_laneq_s32::<1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshl_s8() {
-        let a: i8x8 = i8x8::new(2, -128, 0x7F, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x8 = i8x8::new(8, -128, 0x7F, 12, 16, 20, 24, 28);
-        let r: i8x8 = transmute(vqrshl_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlq_s8() {
-        let a: i8x16 = i8x16::new(2, -128, 0x7F, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x16 = i8x16::new(8, -128, 0x7F, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
-        let r: i8x16 = transmute(vqrshlq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshl_s16() {
-        let a: i16x4 = i16x4::new(2, -32768, 0x7F_FF, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(8, -32768, 0x7F_FF, 12);
-        let r: i16x4 = transmute(vqrshl_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlq_s16() {
-        let a: i16x8 = i16x8::new(2, -32768, 0x7F_FF, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(8, -32768, 0x7F_FF, 12, 16, 20, 24, 28);
-        let r: i16x8 = transmute(vqrshlq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshl_s32() {
-        let a: i32x2 = i32x2::new(2, -2147483648);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: i32x2 = i32x2::new(8, -2147483648);
-        let r: i32x2 = transmute(vqrshl_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlq_s32() {
-        let a: i32x4 = i32x4::new(2, -2147483648, 0x7F_FF_FF_FF, 3);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(8, -2147483648, 0x7F_FF_FF_FF, 12);
-        let r: i32x4 = transmute(vqrshlq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshl_s64() {
-        let a: i64x1 = i64x1::new(2);
-        let b: i64x1 = i64x1::new(2);
-        let e: i64x1 = i64x1::new(8);
-        let r: i64x1 = transmute(vqrshl_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlq_s64() {
-        let a: i64x2 = i64x2::new(2, -9223372036854775808);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: i64x2 = i64x2::new(8, -9223372036854775808);
-        let r: i64x2 = transmute(vqrshlq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshl_u8() {
-        let a: u8x8 = u8x8::new(2, 0, 0xFF, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x8 = u8x8::new(8, 0, 0xFF, 12, 16, 20, 24, 28);
-        let r: u8x8 = transmute(vqrshl_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlq_u8() {
-        let a: u8x16 = u8x16::new(2, 0, 0xFF, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x16 = u8x16::new(8, 0, 0xFF, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
-        let r: u8x16 = transmute(vqrshlq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshl_u16() {
-        let a: u16x4 = u16x4::new(2, 0, 0xFF_FF, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: u16x4 = u16x4::new(8, 0, 0xFF_FF, 12);
-        let r: u16x4 = transmute(vqrshl_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlq_u16() {
-        let a: u16x8 = u16x8::new(2, 0, 0xFF_FF, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u16x8 = u16x8::new(8, 0, 0xFF_FF, 12, 16, 20, 24, 28);
-        let r: u16x8 = transmute(vqrshlq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshl_u32() {
-        let a: u32x2 = u32x2::new(2, 0);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: u32x2 = u32x2::new(8, 0);
-        let r: u32x2 = transmute(vqrshl_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlq_u32() {
-        let a: u32x4 = u32x4::new(2, 0, 0xFF_FF_FF_FF, 3);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: u32x4 = u32x4::new(8, 0, 0xFF_FF_FF_FF, 12);
-        let r: u32x4 = transmute(vqrshlq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshl_u64() {
-        let a: u64x1 = u64x1::new(2);
-        let b: i64x1 = i64x1::new(2);
-        let e: u64x1 = u64x1::new(8);
-        let r: u64x1 = transmute(vqrshl_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshlq_u64() {
-        let a: u64x2 = u64x2::new(2, 0);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: u64x2 = u64x2::new(8, 0);
-        let r: u64x2 = transmute(vqrshlq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_n_s16() {
-        let a: i16x8 = i16x8::new(-32768, 4, 8, 12, 16, 20, 24, 28);
-        let e: i8x8 = i8x8::new(-128, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vqrshrn_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_n_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 4, 8, 12);
-        let e: i16x4 = i16x4::new(-32768, 1, 2, 3);
-        let r: i16x4 = transmute(vqrshrn_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_n_s64() {
-        let a: i64x2 = i64x2::new(-9223372036854775808, 4);
-        let e: i32x2 = i32x2::new(-2147483648, 1);
-        let r: i32x2 = transmute(vqrshrn_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_n_u16() {
-        let a: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u8x8 = transmute(vqrshrn_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_n_u32() {
-        let a: u32x4 = u32x4::new(0, 4, 8, 12);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vqrshrn_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrn_n_u64() {
-        let a: u64x2 = u64x2::new(0, 4);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vqrshrn_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrun_n_s16() {
-        let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u8x8 = transmute(vqrshrun_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrun_n_s32() {
-        let a: i32x4 = i32x4::new(0, 4, 8, 12);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vqrshrun_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqrshrun_n_s64() {
-        let a: i64x2 = i64x2::new(0, 4);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vqrshrun_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x8 = i8x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: i8x8 = transmute(vqshl_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x16 = i8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
-        let r: i8x16 = transmute(vqshlq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(0, 4, 8, 12);
-        let r: i16x4 = transmute(vqshl_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: i16x8 = transmute(vqshlq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: i32x2 = i32x2::new(0, 4);
-        let r: i32x2 = transmute(vqshl_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(0, 4, 8, 12);
-        let r: i32x4 = transmute(vqshlq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let b: i64x1 = i64x1::new(2);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vqshl_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: i64x2 = i64x2::new(0, 4);
-        let r: i64x2 = transmute(vqshlq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: u8x8 = transmute(vqshl_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
-        let r: u8x16 = transmute(vqshlq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: u16x4 = u16x4::new(0, 4, 8, 12);
-        let r: u16x4 = transmute(vqshl_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: u16x8 = transmute(vqshlq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: u32x2 = u32x2::new(0, 4);
-        let r: u32x2 = transmute(vqshl_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: u32x4 = u32x4::new(0, 4, 8, 12);
-        let r: u32x4 = transmute(vqshlq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: i64x1 = i64x1::new(2);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vqshl_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: u64x2 = u64x2::new(0, 4);
-        let r: u64x2 = transmute(vqshlq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_n_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x8 = i8x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: i8x8 = transmute(vqshl_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_n_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: i8x16 = i8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
-        let r: i8x16 = transmute(vqshlq_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_n_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: i16x4 = i16x4::new(0, 4, 8, 12);
-        let r: i16x4 = transmute(vqshl_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_n_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: i16x8 = transmute(vqshlq_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_n_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: i32x2 = i32x2::new(0, 4);
-        let r: i32x2 = transmute(vqshl_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_n_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: i32x4 = i32x4::new(0, 4, 8, 12);
-        let r: i32x4 = transmute(vqshlq_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_n_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vqshl_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_n_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i64x2 = i64x2::new(0, 4);
-        let r: i64x2 = transmute(vqshlq_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_n_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: u8x8 = transmute(vqshl_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_n_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
-        let r: u8x16 = transmute(vqshlq_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_n_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0, 4, 8, 12);
-        let r: u16x4 = transmute(vqshl_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_n_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: u16x8 = transmute(vqshlq_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_n_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0, 4);
-        let r: u32x2 = transmute(vqshl_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_n_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0, 4, 8, 12);
-        let r: u32x4 = transmute(vqshlq_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshl_n_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vqshl_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlq_n_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0, 4);
-        let r: u64x2 = transmute(vqshlq_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlu_n_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: u8x8 = transmute(vqshlu_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlu_n_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0, 4, 8, 12);
-        let r: u16x4 = transmute(vqshlu_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlu_n_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0, 4);
-        let r: u32x2 = transmute(vqshlu_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshlu_n_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vqshlu_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshluq_n_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
-        let r: u8x16 = transmute(vqshluq_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshluq_n_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let r: u16x8 = transmute(vqshluq_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshluq_n_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0, 4, 8, 12);
-        let r: u32x4 = transmute(vqshluq_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshluq_n_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0, 4);
-        let r: u64x2 = transmute(vqshluq_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_n_s16() {
-        let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vqshrn_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_n_s32() {
-        let a: i32x4 = i32x4::new(0, 4, 8, 12);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vqshrn_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_n_s64() {
-        let a: i64x2 = i64x2::new(0, 4);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vqshrn_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_n_u16() {
-        let a: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u8x8 = transmute(vqshrn_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_n_u32() {
-        let a: u32x4 = u32x4::new(0, 4, 8, 12);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vqshrn_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrn_n_u64() {
-        let a: u64x2 = u64x2::new(0, 4);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vqshrn_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrun_n_s16() {
-        let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
-        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u8x8 = transmute(vqshrun_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrun_n_s32() {
-        let a: i32x4 = i32x4::new(0, 4, 8, 12);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vqshrun_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqshrun_n_s64() {
-        let a: i64x2 = i64x2::new(0, 4);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vqshrun_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrte_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let e: f32x2 = f32x2::new(0.998046875, 0.705078125);
-        let r: f32x2 = transmute(vrsqrte_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrteq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e: f32x4 = f32x4::new(0.998046875, 0.705078125, 0.576171875, 0.4990234375);
-        let r: f32x4 = transmute(vrsqrteq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrte_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(4294967295, 4294967295);
-        let r: u32x2 = transmute(vrsqrte_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrteq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(4294967295, 4294967295, 4294967295, 4294967295);
-        let r: u32x4 = transmute(vrsqrteq_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrts_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let b: f32x2 = f32x2::new(1.0, 2.0);
-        let e: f32x2 = f32x2::new(1., -0.5);
-        let r: f32x2 = transmute(vrsqrts_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsqrtsq_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let b: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e: f32x4 = f32x4::new(1., -0.5, -3.0, -6.5);
-        let r: f32x4 = transmute(vrsqrtsq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpe_f32() {
-        let a: f32x2 = f32x2::new(4.0, 3.0);
-        let e: f32x2 = f32x2::new(0.24951171875, 0.3330078125);
-        let r: f32x2 = transmute(vrecpe_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpeq_f32() {
-        let a: f32x4 = f32x4::new(4.0, 3.0, 2.0, 1.0);
-        let e: f32x4 = f32x4::new(0.24951171875, 0.3330078125, 0.4990234375, 0.998046875);
-        let r: f32x4 = transmute(vrecpeq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpe_u32() {
-        let a: u32x2 = u32x2::new(4, 3);
-        let e: u32x2 = u32x2::new(4294967295, 4294967295);
-        let r: u32x2 = transmute(vrecpe_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpeq_u32() {
-        let a: u32x4 = u32x4::new(4, 3, 2, 1);
-        let e: u32x4 = u32x4::new(4294967295, 4294967295, 4294967295, 4294967295);
-        let r: u32x4 = transmute(vrecpeq_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecps_f32() {
-        let a: f32x2 = f32x2::new(4.0, 3.0);
-        let b: f32x2 = f32x2::new(4.0, 3.0);
-        let e: f32x2 = f32x2::new(-14., -7.);
-        let r: f32x2 = transmute(vrecps_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrecpsq_f32() {
-        let a: f32x4 = f32x4::new(4.0, 3.0, 2.0, 1.0);
-        let b: f32x4 = f32x4::new(4.0, 3.0, 2.0, 1.0);
-        let e: f32x4 = f32x4::new(-14., -7., -2., 1.);
-        let r: f32x4 = transmute(vrecpsq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vreinterpret_s8_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_p8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vreinterpret_s8_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_p16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_s16_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_s16_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vreinterpret_s32_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vreinterpretq_s8_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_p8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vreinterpretq_s8_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_p16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_s16_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_s16_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vreinterpretq_s32_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_p8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u8x8 = transmute(vreinterpret_u8_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u8x8 = transmute(vreinterpret_u8_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_p16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vreinterpret_u16_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vreinterpret_u16_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vreinterpret_u32_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_p8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vreinterpretq_u8_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: u8x16 = transmute(vreinterpretq_u8_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_p16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vreinterpretq_u16_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vreinterpretq_u16_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vreinterpretq_u32_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_s8() {
-        let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vreinterpret_p8_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_u8() {
-        let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i8x8 = transmute(vreinterpret_p8_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_p16_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_p16_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_s8() {
-        let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vreinterpretq_p8_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_u8() {
-        let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r: i8x16 = transmute(vreinterpretq_p8_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_p16_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_p16_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_p16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: i16x4 = i16x4::new(0, 0, 1, 0);
-        let r: i16x4 = transmute(vreinterpret_s16_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let e: i16x4 = i16x4::new(0, 0, 1, 0);
-        let r: i16x4 = transmute(vreinterpret_s16_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vreinterpret_s32_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vreinterpret_s32_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_p16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i16x8 = transmute(vreinterpretq_s16_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i16x8 = transmute(vreinterpretq_s16_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i32x4 = i32x4::new(0, 0, 1, 0);
-        let r: i32x4 = transmute(vreinterpretq_s32_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: i32x4 = i32x4::new(0, 0, 1, 0);
-        let r: i32x4 = transmute(vreinterpretq_s32_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_p16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: u16x4 = u16x4::new(0, 0, 1, 0);
-        let r: u16x4 = transmute(vreinterpret_u16_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let e: u16x4 = u16x4::new(0, 0, 1, 0);
-        let r: u16x4 = transmute(vreinterpret_u16_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vreinterpret_u32_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vreinterpret_u32_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_p16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: u16x8 = u16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: u16x8 = transmute(vreinterpretq_u16_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: u16x8 = u16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: u16x8 = transmute(vreinterpretq_u16_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u32x4 = u32x4::new(0, 0, 1, 0);
-        let r: u32x4 = transmute(vreinterpretq_u32_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: u32x4 = u32x4::new(0, 0, 1, 0);
-        let r: u32x4 = transmute(vreinterpretq_u32_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_p16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_s16() {
-        let a: i16x4 = i16x4::new(0, 1, 2, 3);
-        let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_u16() {
-        let a: u16x4 = u16x4::new(0, 1, 2, 3);
-        let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: i16x4 = i16x4::new(0, 0, 1, 0);
-        let r: i16x4 = transmute(vreinterpret_p16_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let e: i16x4 = i16x4::new(0, 0, 1, 0);
-        let r: i16x4 = transmute(vreinterpret_p16_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_p16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_s16() {
-        let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_u16() {
-        let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i16x8 = transmute(vreinterpretq_p16_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let r: i16x8 = transmute(vreinterpretq_p16_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_s32_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vreinterpret_s32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_u32_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vreinterpret_u32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_s32_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i32x4 = i32x4::new(0, 0, 1, 0);
-        let r: i32x4 = transmute(vreinterpretq_s32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_u32_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u32x4 = u32x4::new(0, 0, 1, 0);
-        let r: u32x4 = transmute(vreinterpretq_u32_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_s64_p128() {
-        let a: p128 = 0;
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vreinterpretq_s64_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_u64_p128() {
-        let a: p128 = 0;
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vreinterpretq_u64_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p64_p128() {
-        let a: p128 = 0;
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vreinterpretq_p64_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_s16_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_s16_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_s16_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_p16() {
-        let a: i16x4 = i16x4::new(0, 0, 1, 0);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vreinterpret_s32_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_s16() {
-        let a: i16x4 = i16x4::new(0, 0, 1, 0);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vreinterpret_s32_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_u16() {
-        let a: u16x4 = u16x4::new(0, 0, 1, 0);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vreinterpret_s32_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_s32() {
-        let a: i32x2 = i32x2::new(0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_u32() {
-        let a: u32x2 = u32x2::new(0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_s16_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_s16_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_s16_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_p16() {
-        let a: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vreinterpretq_s32_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vreinterpretq_s32_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vreinterpretq_s32_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_s32() {
-        let a: i32x4 = i32x4::new(0, 0, 1, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_u32() {
-        let a: u32x4 = u32x4::new(0, 0, 1, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vreinterpret_u16_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vreinterpret_u16_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: u16x4 = u16x4::new(0, 1, 2, 3);
-        let r: u16x4 = transmute(vreinterpret_u16_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_p16() {
-        let a: i16x4 = i16x4::new(0, 0, 1, 0);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vreinterpret_u32_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_s16() {
-        let a: i16x4 = i16x4::new(0, 0, 1, 0);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vreinterpret_u32_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_u16() {
-        let a: u16x4 = u16x4::new(0, 0, 1, 0);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vreinterpret_u32_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_s32() {
-        let a: i32x2 = i32x2::new(0, 0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_u32() {
-        let a: u32x2 = u32x2::new(0, 0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vreinterpretq_u16_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vreinterpretq_u16_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: u16x8 = transmute(vreinterpretq_u16_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_p16() {
-        let a: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vreinterpretq_u32_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vreinterpretq_u32_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vreinterpretq_u32_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_s32() {
-        let a: i32x4 = i32x4::new(0, 0, 1, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_u32() {
-        let a: u32x4 = u32x4::new(0, 0, 1, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_p16_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_p16_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0);
-        let e: i16x4 = i16x4::new(0, 1, 2, 3);
-        let r: i16x4 = transmute(vreinterpret_p16_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_p16_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_p16_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0);
-        let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vreinterpretq_p16_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p64_s32() {
-        let a: i32x2 = i32x2::new(0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p64_u32() {
-        let a: u32x2 = u32x2::new(0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p64_s32() {
-        let a: i32x4 = i32x4::new(0, 0, 1, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p64_u32() {
-        let a: u32x4 = u32x4::new(0, 0, 1, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_s64() {
-        let a: i64x2 = i64x2::new(0, 0);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_s64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_u64() {
-        let a: u64x2 = u64x2::new(0, 0);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_u64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_p64() {
-        let a: i64x2 = i64x2::new(0, 0);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_p64(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_s16_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_s16_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_s16_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_s16_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u16x4 = u16x4::new(0, 0, 0, 0);
-        let r: u16x4 = transmute(vreinterpret_u16_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: u16x4 = u16x4::new(0, 0, 0, 0);
-        let r: u16x4 = transmute(vreinterpret_u16_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: u16x8 = transmute(vreinterpretq_u16_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: u16x8 = transmute(vreinterpretq_u16_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_s32() {
-        let a: i32x2 = i32x2::new(0, 1);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_u32() {
-        let a: u32x2 = u32x2::new(0, 1);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_p16_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_p16_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_s32() {
-        let a: i32x4 = i32x4::new(0, 1, 2, 3);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_u32() {
-        let a: u32x4 = u32x4::new(0, 1, 2, 3);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_p16_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_p16_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_s16_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_s16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_u16_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u16x4 = u16x4::new(0, 0, 0, 0);
-        let r: u16x4 = transmute(vreinterpret_u16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p16_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_p16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_s16_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_s16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_u16_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: u16x8 = transmute(vreinterpretq_u16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p16_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_p16_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_s32_p128() {
-        let a: p128 = 0;
-        let e: i32x4 = i32x4::new(0, 0, 0, 0);
-        let r: i32x4 = transmute(vreinterpretq_s32_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_u32_p128() {
-        let a: p128 = 0;
-        let e: u32x4 = u32x4::new(0, 0, 0, 0);
-        let r: u32x4 = transmute(vreinterpretq_u32_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vreinterpret_s32_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vreinterpret_s32_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i32x2 = i32x2::new(0, 1);
-        let r: i32x2 = transmute(vreinterpret_s32_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_p16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_s16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_u16() {
-        let a: u16x4 = u16x4::new(0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vreinterpretq_s32_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vreinterpretq_s32_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let e: i32x4 = i32x4::new(0, 1, 2, 3);
-        let r: i32x4 = transmute(vreinterpretq_s32_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_p16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vreinterpret_u32_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vreinterpret_u32_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: u32x2 = u32x2::new(0, 1);
-        let r: u32x2 = transmute(vreinterpret_u32_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_p16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_s16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_u16() {
-        let a: u16x4 = u16x4::new(0, 0, 0, 0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vreinterpretq_u32_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vreinterpretq_u32_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0);
-        let e: u32x4 = u32x4::new(0, 1, 2, 3);
-        let r: u32x4 = transmute(vreinterpretq_u32_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_p16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p64_p16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p64_s16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p64_u16() {
-        let a: u16x4 = u16x4::new(0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p64_p16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p64_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p64_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_s32() {
-        let a: i32x4 = i32x4::new(0, 0, 0, 0);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_s32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_u32() {
-        let a: u32x4 = u32x4::new(0, 0, 0, 0);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_u32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_s64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_u64() {
-        let a: u64x2 = u64x2::new(0, 1);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_s8_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_u8_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p8_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_s8_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_u8_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p8_p64() {
-        let a: i64x2 = i64x2::new(0, 1);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_s16_p128() {
-        let a: p128 = 0;
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_s16_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_u16_p128() {
-        let a: p128 = 0;
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vreinterpretq_u16_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p16_p128() {
-        let a: p128 = 0;
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_p16_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_s64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: u64x2 = u64x2::new(0, 1);
-        let r: u64x2 = transmute(vreinterpretq_u64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p64_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p64_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpret_p64_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_p64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p64_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p64_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p64_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0);
-        let e: i64x2 = i64x2::new(0, 1);
-        let r: i64x2 = transmute(vreinterpretq_p64_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_s16(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_u16(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_p16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_p16(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_s8() {
-        let a: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: p128 = 1;
-        let r: p128 = vreinterpretq_p128_s8(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_u8() {
-        let a: u8x16 = u8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: p128 = 1;
-        let r: p128 = vreinterpretq_p128_u8(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p128_p8() {
-        let a: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: p128 = 1;
-        let r: p128 = vreinterpretq_p128_p8(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_s8_p128() {
-        let a: p128 = 1;
-        let e: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_u8_p128() {
-        let a: p128 = 1;
-        let e: u8x16 = u8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vreinterpretq_p8_p128() {
-        let a: p128 = 1;
-        let e: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s8_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_s8_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s16_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_s16_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s32_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: i32x2 = i32x2::new(0, 0);
-        let r: i32x2 = transmute(vreinterpret_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_s64_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vreinterpret_s64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s8_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_s8_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s16_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_s16_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s32_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: i32x4 = i32x4::new(0, 0, 0, 0);
-        let r: i32x4 = transmute(vreinterpretq_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_s64_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: i64x2 = i64x2::new(0, 0);
-        let r: i64x2 = transmute(vreinterpretq_s64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u8_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vreinterpret_u8_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u16_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: u16x4 = u16x4::new(0, 0, 0, 0);
-        let r: u16x4 = transmute(vreinterpret_u16_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u32_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vreinterpret_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_u64_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vreinterpret_u64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u8_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x16 = transmute(vreinterpretq_u8_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u16_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u16x8 = transmute(vreinterpretq_u16_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u32_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: u32x4 = u32x4::new(0, 0, 0, 0);
-        let r: u32x4 = transmute(vreinterpretq_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_u64_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: u64x2 = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vreinterpretq_u64_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p8_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vreinterpret_p8_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_p16_f32() {
-        let a: f32x2 = f32x2::new(0., 0.);
-        let e: i16x4 = i16x4::new(0, 0, 0, 0);
-        let r: i16x4 = transmute(vreinterpret_p16_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p8_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i8x16 = transmute(vreinterpretq_p8_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p16_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: i16x8 = transmute(vreinterpretq_p16_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_p128_f32() {
-        let a: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let e: p128 = 0;
-        let r: p128 = vreinterpretq_p128_f32(transmute(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_s8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_s16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_s32() {
-        let a: i32x2 = i32x2::new(0, 0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_s8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_s16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_s32() {
-        let a: i32x4 = i32x4::new(0, 0, 0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_s64() {
-        let a: i64x2 = i64x2::new(0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_u8() {
-        let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_u16() {
-        let a: u16x4 = u16x4::new(0, 0, 0, 0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_u32() {
-        let a: u32x2 = u32x2::new(0, 0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_u8() {
-        let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_u16() {
-        let a: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_u32() {
-        let a: u32x4 = u32x4::new(0, 0, 0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_u64() {
-        let a: u64x2 = u64x2::new(0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_p8() {
-        let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpret_f32_p16() {
-        let a: i16x4 = i16x4::new(0, 0, 0, 0);
-        let e: f32x2 = f32x2::new(0., 0.);
-        let r: f32x2 = transmute(vreinterpret_f32_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_p8() {
-        let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_p16() {
-        let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vreinterpretq_f32_p128() {
-        let a: p128 = 0;
-        let e: f32x4 = f32x4::new(0., 0., 0., 0.);
-        let r: f32x4 = transmute(vreinterpretq_f32_p128(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshl_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: i8x8 = transmute(vrshl_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshlq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let r: i8x16 = transmute(vrshlq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshl_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(4, 8, 12, 16);
-        let r: i16x4 = transmute(vrshl_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshlq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: i16x8 = transmute(vrshlq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshl_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: i32x2 = i32x2::new(4, 8);
-        let r: i32x2 = transmute(vrshl_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshlq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(4, 8, 12, 16);
-        let r: i32x4 = transmute(vrshlq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshl_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(2);
-        let e: i64x1 = i64x1::new(4);
-        let r: i64x1 = transmute(vrshl_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshlq_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: i64x2 = i64x2::new(4, 8);
-        let r: i64x2 = transmute(vrshlq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshl_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: u8x8 = transmute(vrshl_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshlq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let r: u8x16 = transmute(vrshlq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshl_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: u16x4 = u16x4::new(4, 8, 12, 16);
-        let r: u16x4 = transmute(vrshl_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshlq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: u16x8 = transmute(vrshlq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshl_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: u32x2 = u32x2::new(4, 8);
-        let r: u32x2 = transmute(vrshl_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshlq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: u32x4 = u32x4::new(4, 8, 12, 16);
-        let r: u32x4 = transmute(vrshlq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshl_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: i64x1 = i64x1::new(2);
-        let e: u64x1 = u64x1::new(4);
-        let r: u64x1 = transmute(vrshl_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshlq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: u64x2 = u64x2::new(4, 8);
-        let r: u64x2 = transmute(vrshlq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshr_n_s8() {
-        let a: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vrshr_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrq_n_s8() {
-        let a: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vrshrq_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshr_n_s16() {
-        let a: i16x4 = i16x4::new(4, 8, 12, 16);
-        let e: i16x4 = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vrshr_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrq_n_s16() {
-        let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vrshrq_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshr_n_s32() {
-        let a: i32x2 = i32x2::new(4, 8);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vrshr_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrq_n_s32() {
-        let a: i32x4 = i32x4::new(4, 8, 12, 16);
-        let e: i32x4 = i32x4::new(1, 2, 3, 4);
-        let r: i32x4 = transmute(vrshrq_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshr_n_s64() {
-        let a: i64x1 = i64x1::new(4);
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vrshr_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrq_n_s64() {
-        let a: i64x2 = i64x2::new(4, 8);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vrshrq_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshr_n_u8() {
-        let a: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vrshr_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrq_n_u8() {
-        let a: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vrshrq_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshr_n_u16() {
-        let a: u16x4 = u16x4::new(4, 8, 12, 16);
-        let e: u16x4 = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vrshr_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrq_n_u16() {
-        let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vrshrq_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshr_n_u32() {
-        let a: u32x2 = u32x2::new(4, 8);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vrshr_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrq_n_u32() {
-        let a: u32x4 = u32x4::new(4, 8, 12, 16);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vrshrq_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshr_n_u64() {
-        let a: u64x1 = u64x1::new(4);
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vrshr_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrq_n_u64() {
-        let a: u64x2 = u64x2::new(4, 8);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vrshrq_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_n_s16() {
-        let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vrshrn_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_n_s32() {
-        let a: i32x4 = i32x4::new(4, 8, 12, 16);
-        let e: i16x4 = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vrshrn_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_n_s64() {
-        let a: i64x2 = i64x2::new(4, 8);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vrshrn_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_n_u16() {
-        let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vrshrn_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_n_u32() {
-        let a: u32x4 = u32x4::new(4, 8, 12, 16);
-        let e: u16x4 = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vrshrn_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrshrn_n_u64() {
-        let a: u64x2 = u64x2::new(4, 8);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vrshrn_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_s8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i8x8 = i8x8::new(2, 3, 4, 5, 6, 7, 8, 9);
-        let r: i8x8 = transmute(vrsra_n_s8::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_s8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let e: i8x16 = i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
-        let r: i8x16 = transmute(vrsraq_n_s8::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(4, 8, 12, 16);
-        let e: i16x4 = i16x4::new(2, 3, 4, 5);
-        let r: i16x4 = transmute(vrsra_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i16x8 = i16x8::new(2, 3, 4, 5, 6, 7, 8, 9);
-        let r: i16x8 = transmute(vrsraq_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let b: i32x2 = i32x2::new(4, 8);
-        let e: i32x2 = i32x2::new(2, 3);
-        let r: i32x2 = transmute(vrsra_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i32x4 = i32x4::new(4, 8, 12, 16);
-        let e: i32x4 = i32x4::new(2, 3, 4, 5);
-        let r: i32x4 = transmute(vrsraq_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(4);
-        let e: i64x1 = i64x1::new(2);
-        let r: i64x1 = transmute(vrsra_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_s64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let b: i64x2 = i64x2::new(4, 8);
-        let e: i64x2 = i64x2::new(2, 3);
-        let r: i64x2 = transmute(vrsraq_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_u8() {
-        let a: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u8x8 = u8x8::new(2, 3, 4, 5, 6, 7, 8, 9);
-        let r: u8x8 = transmute(vrsra_n_u8::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_u8() {
-        let a: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let b: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let e: u8x16 = u8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
-        let r: u8x16 = transmute(vrsraq_n_u8::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_u16() {
-        let a: u16x4 = u16x4::new(1, 1, 1, 1);
-        let b: u16x4 = u16x4::new(4, 8, 12, 16);
-        let e: u16x4 = u16x4::new(2, 3, 4, 5);
-        let r: u16x4 = transmute(vrsra_n_u16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_u16() {
-        let a: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u16x8 = u16x8::new(2, 3, 4, 5, 6, 7, 8, 9);
-        let r: u16x8 = transmute(vrsraq_n_u16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let b: u32x2 = u32x2::new(4, 8);
-        let e: u32x2 = u32x2::new(2, 3);
-        let r: u32x2 = transmute(vrsra_n_u32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 1);
-        let b: u32x4 = u32x4::new(4, 8, 12, 16);
-        let e: u32x4 = u32x4::new(2, 3, 4, 5);
-        let r: u32x4 = transmute(vrsraq_n_u32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsra_n_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x1 = u64x1::new(4);
-        let e: u64x1 = u64x1::new(2);
-        let r: u64x1 = transmute(vrsra_n_u64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsraq_n_u64() {
-        let a: u64x2 = u64x2::new(1, 1);
-        let b: u64x2 = u64x2::new(4, 8);
-        let e: u64x2 = u64x2::new(2, 3);
-        let r: u64x2 = transmute(vrsraq_n_u64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_s16() {
-        let a: i16x8 = i16x8::new(0x7F_FF, -32768, 0, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(-128, -128, 0, 0, 0, 0, 0, 0);
-        let r: i8x8 = transmute(vrsubhn_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_s32() {
-        let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 0, 4);
-        let b: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(-32768, -32768, 0, 0);
-        let r: i16x4 = transmute(vrsubhn_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_s64() {
-        let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
-        let b: i64x2 = i64x2::new(1, 2);
-        let e: i32x2 = i32x2::new(-2147483648, -2147483648);
-        let r: i32x2 = transmute(vrsubhn_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_u16() {
-        let a: u16x8 = u16x8::new(0xFF_FF, 0, 3, 4, 5, 6, 7, 8);
-        let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
-        let r: u8x8 = transmute(vrsubhn_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_u32() {
-        let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(0, 0, 0, 0);
-        let r: u16x4 = transmute(vrsubhn_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrsubhn_u64() {
-        let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
-        let b: u64x2 = u64x2::new(1, 2);
-        let e: u32x2 = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vrsubhn_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_s8() {
-        let a: i8 = 1;
-        let b: i8x8 = i8x8::new(0, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vset_lane_s8::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_s16() {
-        let a: i16 = 1;
-        let b: i16x4 = i16x4::new(0, 2, 3, 4);
-        let e: i16x4 = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vset_lane_s16::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_s32() {
-        let a: i32 = 1;
-        let b: i32x2 = i32x2::new(0, 2);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vset_lane_s32::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_s64() {
-        let a: i64 = 1;
-        let b: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vset_lane_s64::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_u8() {
-        let a: u8 = 1;
-        let b: u8x8 = u8x8::new(0, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vset_lane_u8::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_u16() {
-        let a: u16 = 1;
-        let b: u16x4 = u16x4::new(0, 2, 3, 4);
-        let e: u16x4 = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vset_lane_u16::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_u32() {
-        let a: u32 = 1;
-        let b: u32x2 = u32x2::new(0, 2);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vset_lane_u32::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_u64() {
-        let a: u64 = 1;
-        let b: u64x1 = u64x1::new(0);
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vset_lane_u64::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_p8() {
-        let a: p8 = 1;
-        let b: i8x8 = i8x8::new(0, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vset_lane_p8::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_p16() {
-        let a: p16 = 1;
-        let b: i16x4 = i16x4::new(0, 2, 3, 4);
-        let e: i16x4 = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vset_lane_p16::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vset_lane_p64() {
-        let a: p64 = 1;
-        let b: i64x1 = i64x1::new(0);
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vset_lane_p64::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_s8() {
-        let a: i8 = 1;
-        let b: i8x16 = i8x16::new(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vsetq_lane_s8::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_s16() {
-        let a: i16 = 1;
-        let b: i16x8 = i16x8::new(0, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vsetq_lane_s16::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_s32() {
-        let a: i32 = 1;
-        let b: i32x4 = i32x4::new(0, 2, 3, 4);
-        let e: i32x4 = i32x4::new(1, 2, 3, 4);
-        let r: i32x4 = transmute(vsetq_lane_s32::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_s64() {
-        let a: i64 = 1;
-        let b: i64x2 = i64x2::new(0, 2);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vsetq_lane_s64::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_u8() {
-        let a: u8 = 1;
-        let b: u8x16 = u8x16::new(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vsetq_lane_u8::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_u16() {
-        let a: u16 = 1;
-        let b: u16x8 = u16x8::new(0, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vsetq_lane_u16::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_u32() {
-        let a: u32 = 1;
-        let b: u32x4 = u32x4::new(0, 2, 3, 4);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vsetq_lane_u32::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_u64() {
-        let a: u64 = 1;
-        let b: u64x2 = u64x2::new(0, 2);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vsetq_lane_u64::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_p8() {
-        let a: p8 = 1;
-        let b: i8x16 = i8x16::new(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vsetq_lane_p8::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_p16() {
-        let a: p16 = 1;
-        let b: i16x8 = i16x8::new(0, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vsetq_lane_p16::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vsetq_lane_p64() {
-        let a: p64 = 1;
-        let b: i64x2 = i64x2::new(0, 2);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vsetq_lane_p64::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vset_lane_f32() {
-        let a: f32 = 1.;
-        let b: f32x2 = f32x2::new(0., 2.);
-        let e: f32x2 = f32x2::new(1., 2.);
-        let r: f32x2 = transmute(vset_lane_f32::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsetq_lane_f32() {
-        let a: f32 = 1.;
-        let b: f32x4 = f32x4::new(0., 2., 3., 4.);
-        let e: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let r: f32x4 = transmute(vsetq_lane_f32::<0>(a, transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: i8x8 = transmute(vshl_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let r: i8x16 = transmute(vshlq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: i16x4 = i16x4::new(4, 8, 12, 16);
-        let r: i16x4 = transmute(vshl_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: i16x8 = transmute(vshlq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: i32x2 = i32x2::new(4, 8);
-        let r: i32x2 = transmute(vshl_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: i32x4 = i32x4::new(4, 8, 12, 16);
-        let r: i32x4 = transmute(vshlq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(2);
-        let e: i64x1 = i64x1::new(4);
-        let r: i64x1 = transmute(vshl_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: i64x2 = i64x2::new(4, 8);
-        let r: i64x2 = transmute(vshlq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: u8x8 = transmute(vshl_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let r: u8x16 = transmute(vshlq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(2, 2, 2, 2);
-        let e: u16x4 = u16x4::new(4, 8, 12, 16);
-        let r: u16x4 = transmute(vshl_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
-        let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: u16x8 = transmute(vshlq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(2, 2);
-        let e: u32x2 = u32x2::new(4, 8);
-        let r: u32x2 = transmute(vshl_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: i32x4 = i32x4::new(2, 2, 2, 2);
-        let e: u32x4 = u32x4::new(4, 8, 12, 16);
-        let r: u32x4 = transmute(vshlq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: i64x1 = i64x1::new(2);
-        let e: u64x1 = u64x1::new(4);
-        let r: u64x1 = transmute(vshl_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: i64x2 = i64x2::new(2, 2);
-        let e: u64x2 = u64x2::new(4, 8);
-        let r: u64x2 = transmute(vshlq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_n_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: i8x8 = transmute(vshl_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_n_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let r: i8x16 = transmute(vshlq_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_n_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i16x4 = i16x4::new(4, 8, 12, 16);
-        let r: i16x4 = transmute(vshl_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_n_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: i16x8 = transmute(vshlq_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_n_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let e: i32x2 = i32x2::new(4, 8);
-        let r: i32x2 = transmute(vshl_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_n_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(4, 8, 12, 16);
-        let r: i32x4 = transmute(vshlq_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_n_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: u8x8 = transmute(vshl_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_n_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let r: u8x16 = transmute(vshlq_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_n_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u16x4 = u16x4::new(4, 8, 12, 16);
-        let r: u16x4 = transmute(vshl_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_n_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: u16x8 = transmute(vshlq_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_n_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let e: u32x2 = u32x2::new(4, 8);
-        let r: u32x2 = transmute(vshl_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_n_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(4, 8, 12, 16);
-        let r: u32x4 = transmute(vshlq_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_n_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let e: i64x1 = i64x1::new(4);
-        let r: i64x1 = transmute(vshl_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_n_s64() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let e: i64x2 = i64x2::new(4, 8);
-        let r: i64x2 = transmute(vshlq_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshl_n_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(4);
-        let r: u64x1 = transmute(vshl_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshlq_n_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let e: u64x2 = u64x2::new(4, 8);
-        let r: u64x2 = transmute(vshlq_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+    let a0: poly8x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: poly8x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_n_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: i16x8 = transmute(vshll_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    let a0: poly8x16_t = simd_shuffle!(
+        a,
+        b,
+        [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+    );
+    let b0: poly8x16_t = simd_shuffle!(
+        a,
+        b,
+        [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+    );
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_n_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 3, 4);
-        let e: i32x4 = i32x4::new(4, 8, 12, 16);
-        let r: i32x4 = transmute(vshll_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    let a0: poly16x4_t = simd_shuffle!(a, b, [0, 2, 4, 6]);
+    let b0: poly16x4_t = simd_shuffle!(a, b, [1, 3, 5, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_n_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let e: i64x2 = i64x2::new(4, 8);
-        let r: i64x2 = transmute(vshll_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Unzip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzpq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uzp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    let a0: poly16x8_t = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+    let b0: poly16x8_t = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_n_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let r: u16x8 = transmute(vshll_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t {
+    let a0: float32x2_t = simd_shuffle!(a, b, [0, 2]);
+    let b0: float32x2_t = simd_shuffle!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_n_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(4, 8, 12, 16);
-        let r: u32x4 = transmute(vshll_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t {
+    let a0: int32x2_t = simd_shuffle!(a, b, [0, 2]);
+    let b0: int32x2_t = simd_shuffle!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshll_n_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let e: u64x2 = u64x2::new(4, 8);
-        let r: u64x2 = transmute(vshll_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t {
+    let a0: uint32x2_t = simd_shuffle!(a, b, [0, 2]);
+    let b0: uint32x2_t = simd_shuffle!(a, b, [1, 3]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshr_n_s8() {
-        let a: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vshr_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t {
+    let a0: int8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: int8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrq_n_s8() {
-        let a: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x16 = transmute(vshrq_n_s8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t {
+    let a0: int16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+    let b0: int16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshr_n_s16() {
-        let a: i16x4 = i16x4::new(4, 8, 12, 16);
-        let e: i16x4 = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vshr_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t {
+    let a0: uint8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: uint8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrq_n_s16() {
-        let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vshrq_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t {
+    let a0: uint16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+    let b0: uint16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshr_n_s32() {
-        let a: i32x2 = i32x2::new(4, 8);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vshr_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t {
+    let a0: poly8x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: poly8x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrq_n_s32() {
-        let a: i32x4 = i32x4::new(4, 8, 12, 16);
-        let e: i32x4 = i32x4::new(1, 2, 3, 4);
-        let r: i32x4 = transmute(vshrq_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t {
+    let a0: poly16x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+    let b0: poly16x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshr_n_s64() {
-        let a: i64x1 = i64x1::new(4);
-        let e: i64x1 = i64x1::new(1);
-        let r: i64x1 = transmute(vshr_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t {
+    let a0: float32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+    let b0: float32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrq_n_s64() {
-        let a: i64x2 = i64x2::new(4, 8);
-        let e: i64x2 = i64x2::new(1, 2);
-        let r: i64x2 = transmute(vshrq_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t {
+    let a0: int8x16_t = simd_shuffle!(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+    );
+    let b0: int8x16_t = simd_shuffle!(
+        a,
+        b,
+        [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+    );
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshr_n_u8() {
-        let a: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vshr_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t {
+    let a0: int16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: int16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrq_n_u8() {
-        let a: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t {
+    let a0: int32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+    let b0: int32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshr_n_u16() {
-        let a: u16x4 = u16x4::new(4, 8, 12, 16);
-        let e: u16x4 = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vshr_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t {
+    let a0: uint8x16_t = simd_shuffle!(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+    );
+    let b0: uint8x16_t = simd_shuffle!(
+        a,
+        b,
+        [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+    );
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrq_n_u16() {
-        let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vshrq_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t {
+    let a0: uint16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: uint16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshr_n_u32() {
-        let a: u32x2 = u32x2::new(4, 8);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vshr_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t {
+    let a0: uint32x4_t = simd_shuffle!(a, b, [0, 4, 1, 5]);
+    let b0: uint32x4_t = simd_shuffle!(a, b, [2, 6, 3, 7]);
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrq_n_u32() {
-        let a: u32x4 = u32x4::new(4, 8, 12, 16);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vshrq_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t {
+    let a0: poly8x16_t = simd_shuffle!(
+        a,
+        b,
+        [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]
+    );
+    let b0: poly8x16_t = simd_shuffle!(
+        a,
+        b,
+        [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]
+    );
+    transmute((a0, b0))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshr_n_u64() {
-        let a: u64x1 = u64x1::new(4);
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vshr_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrq_n_u64() {
-        let a: u64x2 = u64x2::new(4, 8);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vshrq_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_n_s16() {
-        let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vshrn_n_s16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_n_s32() {
-        let a: i32x4 = i32x4::new(4, 8, 12, 16);
-        let e: i16x4 = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vshrn_n_s32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_n_s64() {
-        let a: i64x2 = i64x2::new(4, 8);
-        let e: i32x2 = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vshrn_n_s64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_n_u16() {
-        let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vshrn_n_u16::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_n_u32() {
-        let a: u32x4 = u32x4::new(4, 8, 12, 16);
-        let e: u16x4 = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vshrn_n_u32::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vshrn_n_u64() {
-        let a: u64x2 = u64x2::new(4, 8);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vshrn_n_u64::<2>(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsra_n_s8() {
-        let a: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i8x8 = i8x8::new(2, 3, 4, 5, 6, 7, 8, 9);
-        let r: i8x8 = transmute(vsra_n_s8::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsraq_n_s8() {
-        let a: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let e: i8x16 = i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
-        let r: i8x16 = transmute(vsraq_n_s8::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsra_n_s16() {
-        let a: i16x4 = i16x4::new(1, 1, 1, 1);
-        let b: i16x4 = i16x4::new(4, 8, 12, 16);
-        let e: i16x4 = i16x4::new(2, 3, 4, 5);
-        let r: i16x4 = transmute(vsra_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsraq_n_s16() {
-        let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: i16x8 = i16x8::new(2, 3, 4, 5, 6, 7, 8, 9);
-        let r: i16x8 = transmute(vsraq_n_s16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsra_n_s32() {
-        let a: i32x2 = i32x2::new(1, 1);
-        let b: i32x2 = i32x2::new(4, 8);
-        let e: i32x2 = i32x2::new(2, 3);
-        let r: i32x2 = transmute(vsra_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsraq_n_s32() {
-        let a: i32x4 = i32x4::new(1, 1, 1, 1);
-        let b: i32x4 = i32x4::new(4, 8, 12, 16);
-        let e: i32x4 = i32x4::new(2, 3, 4, 5);
-        let r: i32x4 = transmute(vsraq_n_s32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsra_n_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(4);
-        let e: i64x1 = i64x1::new(2);
-        let r: i64x1 = transmute(vsra_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsraq_n_s64() {
-        let a: i64x2 = i64x2::new(1, 1);
-        let b: i64x2 = i64x2::new(4, 8);
-        let e: i64x2 = i64x2::new(2, 3);
-        let r: i64x2 = transmute(vsraq_n_s64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsra_n_u8() {
-        let a: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u8x8 = u8x8::new(2, 3, 4, 5, 6, 7, 8, 9);
-        let r: u8x8 = transmute(vsra_n_u8::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsraq_n_u8() {
-        let a: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
-        let b: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
-        let e: u8x16 = u8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
-        let r: u8x16 = transmute(vsraq_n_u8::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsra_n_u16() {
-        let a: u16x4 = u16x4::new(1, 1, 1, 1);
-        let b: u16x4 = u16x4::new(4, 8, 12, 16);
-        let e: u16x4 = u16x4::new(2, 3, 4, 5);
-        let r: u16x4 = transmute(vsra_n_u16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsraq_n_u16() {
-        let a: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
-        let b: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32);
-        let e: u16x8 = u16x8::new(2, 3, 4, 5, 6, 7, 8, 9);
-        let r: u16x8 = transmute(vsraq_n_u16::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsra_n_u32() {
-        let a: u32x2 = u32x2::new(1, 1);
-        let b: u32x2 = u32x2::new(4, 8);
-        let e: u32x2 = u32x2::new(2, 3);
-        let r: u32x2 = transmute(vsra_n_u32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsraq_n_u32() {
-        let a: u32x4 = u32x4::new(1, 1, 1, 1);
-        let b: u32x4 = u32x4::new(4, 8, 12, 16);
-        let e: u32x4 = u32x4::new(2, 3, 4, 5);
-        let r: u32x4 = transmute(vsraq_n_u32::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsra_n_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x1 = u64x1::new(4);
-        let e: u64x1 = u64x1::new(2);
-        let r: u64x1 = transmute(vsra_n_u64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsraq_n_u64() {
-        let a: u64x2 = u64x2::new(1, 1);
-        let b: u64x2 = u64x2::new(4, 8);
-        let e: u64x2 = u64x2::new(2, 3);
-        let r: u64x2 = transmute(vsraq_n_u64::<2>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn_s8() {
-        let a: i8x8 = i8x8::new(0, 2, 2, 6, 2, 10, 6, 14);
-        let b: i8x8 = i8x8::new(1, 3, 3, 7, 3, 1, 7, 15);
-        let e: [i8; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
-        let r: [i8; 16] = transmute(vtrn_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn_s16() {
-        let a: i16x4 = i16x4::new(0, 2, 2, 6);
-        let b: i16x4 = i16x4::new(1, 3, 3, 7);
-        let e: [i16; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
-        let r: [i16; 8] = transmute(vtrn_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrnq_s8() {
-        let a: i8x16 = i8x16::new(0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30);
-        let b: i8x16 = i8x16::new(1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31);
-        let e: [i8; 32] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31];
-        let r: [i8; 32] = transmute(vtrnq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrnq_s16() {
-        let a: i16x8 = i16x8::new(0, 2, 2, 6, 2, 10, 6, 14);
-        let b: i16x8 = i16x8::new(1, 3, 3, 7, 3, 1, 7, 15);
-        let e: [i16; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
-        let r: [i16; 16] = transmute(vtrnq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrnq_s32() {
-        let a: i32x4 = i32x4::new(0, 2, 2, 6);
-        let b: i32x4 = i32x4::new(1, 3, 3, 7);
-        let e: [i32; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
-        let r: [i32; 8] = transmute(vtrnq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn_u8() {
-        let a: u8x8 = u8x8::new(0, 2, 2, 6, 2, 10, 6, 14);
-        let b: u8x8 = u8x8::new(1, 3, 3, 7, 3, 1, 7, 15);
-        let e: [u8; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
-        let r: [u8; 16] = transmute(vtrn_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn_u16() {
-        let a: u16x4 = u16x4::new(0, 2, 2, 6);
-        let b: u16x4 = u16x4::new(1, 3, 3, 7);
-        let e: [u16; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
-        let r: [u16; 8] = transmute(vtrn_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrnq_u8() {
-        let a: u8x16 = u8x16::new(0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30);
-        let b: u8x16 = u8x16::new(1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31);
-        let e: [u8; 32] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31];
-        let r: [u8; 32] = transmute(vtrnq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrnq_u16() {
-        let a: u16x8 = u16x8::new(0, 2, 2, 6, 2, 10, 6, 14);
-        let b: u16x8 = u16x8::new(1, 3, 3, 7, 3, 1, 7, 15);
-        let e: [u16; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
-        let r: [u16; 16] = transmute(vtrnq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrnq_u32() {
-        let a: u32x4 = u32x4::new(0, 2, 2, 6);
-        let b: u32x4 = u32x4::new(1, 3, 3, 7);
-        let e: [u32; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
-        let r: [u32; 8] = transmute(vtrnq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn_p8() {
-        let a: i8x8 = i8x8::new(0, 2, 2, 6, 2, 10, 6, 14);
-        let b: i8x8 = i8x8::new(1, 3, 3, 7, 3, 1, 7, 15);
-        let e: [u8; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
-        let r: [u8; 16] = transmute(vtrn_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn_p16() {
-        let a: i16x4 = i16x4::new(0, 2, 2, 6);
-        let b: i16x4 = i16x4::new(1, 3, 3, 7);
-        let e: [u16; 8] = [0, 1, 2, 3, 2, 3, 6, 7];
-        let r: [u16; 8] = transmute(vtrn_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrnq_p8() {
-        let a: i8x16 = i8x16::new(0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30);
-        let b: i8x16 = i8x16::new(1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31);
-        let e: [u8; 32] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31];
-        let r: [u8; 32] = transmute(vtrnq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrnq_p16() {
-        let a: i16x8 = i16x8::new(0, 2, 2, 6, 2, 10, 6, 14);
-        let b: i16x8 = i16x8::new(1, 3, 3, 7, 3, 1, 7, 15);
-        let e: [u16; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15];
-        let r: [u16; 16] = transmute(vtrnq_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn_s32() {
-        let a: i32x2 = i32x2::new(0, 2);
-        let b: i32x2 = i32x2::new(1, 3);
-        let e: [i32; 4] = [0, 1, 2, 3];
-        let r: [i32; 4] = transmute(vtrn_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn_u32() {
-        let a: u32x2 = u32x2::new(0, 2);
-        let b: u32x2 = u32x2::new(1, 3);
-        let e: [u32; 4] = [0, 1, 2, 3];
-        let r: [u32; 4] = transmute(vtrn_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrn_f32() {
-        let a: f32x2 = f32x2::new(0., 2.);
-        let b: f32x2 = f32x2::new(1., 3.);
-        let e: [f32; 4] = [0., 1., 2., 3.];
-        let r: [f32; 4] = transmute(vtrn_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vtrnq_f32() {
-        let a: f32x4 = f32x4::new(0., 2., 2., 6.);
-        let b: f32x4 = f32x4::new(1., 3., 3., 7.);
-        let e: [f32; 8] = [0., 1., 2., 3., 2., 3., 6., 7.];
-        let r: [f32; 8] = transmute(vtrnq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_s8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: [i8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-        let r: [i8; 16] = transmute(vzip_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_s16() {
-        let a: i16x4 = i16x4::new(0, 2, 4, 6);
-        let b: i16x4 = i16x4::new(1, 3, 5, 7);
-        let e: [i16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
-        let r: [i16; 8] = transmute(vzip_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_u8() {
-        let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-        let r: [u8; 16] = transmute(vzip_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_u16() {
-        let a: u16x4 = u16x4::new(0, 2, 4, 6);
-        let b: u16x4 = u16x4::new(1, 3, 5, 7);
-        let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
-        let r: [u16; 8] = transmute(vzip_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_p8() {
-        let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-        let r: [u8; 16] = transmute(vzip_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_p16() {
-        let a: i16x4 = i16x4::new(0, 2, 4, 6);
-        let b: i16x4 = i16x4::new(1, 3, 5, 7);
-        let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
-        let r: [u16; 8] = transmute(vzip_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_s32() {
-        let a: i32x2 = i32x2::new(0, 2);
-        let b: i32x2 = i32x2::new(1, 3);
-        let e: [i32; 4] = [0, 1, 2, 3];
-        let r: [i32; 4] = transmute(vzip_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_u32() {
-        let a: u32x2 = u32x2::new(0, 2);
-        let b: u32x2 = u32x2::new(1, 3);
-        let e: [u32; 4] = [0, 1, 2, 3];
-        let r: [u32; 4] = transmute(vzip_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzipq_s8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: [i8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31];
-        let r: [i8; 32] = transmute(vzipq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzipq_s16() {
-        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: [i16; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-        let r: [i16; 16] = transmute(vzipq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzipq_s32() {
-        let a: i32x4 = i32x4::new(0, 2, 4, 6);
-        let b: i32x4 = i32x4::new(1, 3, 5, 7);
-        let e: [i32; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
-        let r: [i32; 8] = transmute(vzipq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzipq_u8() {
-        let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: u8x16 = u8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31];
-        let r: [u8; 32] = transmute(vzipq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzipq_u16() {
-        let a: u16x8 = u16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: u16x8 = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: [u16; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-        let r: [u16; 16] = transmute(vzipq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzipq_u32() {
-        let a: u32x4 = u32x4::new(0, 2, 4, 6);
-        let b: u32x4 = u32x4::new(1, 3, 5, 7);
-        let e: [u32; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
-        let r: [u32; 8] = transmute(vzipq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzipq_p8() {
-        let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
-        let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
-        let e: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31];
-        let r: [u8; 32] = transmute(vzipq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzipq_p16() {
-        let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15);
-        let e: [u16; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-        let r: [u16; 16] = transmute(vzipq_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzip_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(5., 6.);
-        let e: [f32; 4] = [1., 5., 2., 6.];
-        let r: [f32; 4] = transmute(vzip_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vzipq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 3., 4.);
-        let b: f32x4 = f32x4::new(5., 6., 7., 8.);
-        let e: [f32; 8] = [1., 5., 2., 6., 3., 7., 4., 8.];
-        let r: [f32; 8] = transmute(vzipq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp_s8() {
-        let a: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 3, 8);
-        let b: i8x8 = i8x8::new(2, 3, 3, 8, 3, 15, 8, 16);
-        let e: [i8; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
-        let r: [i8; 16] = transmute(vuzp_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp_s16() {
-        let a: i16x4 = i16x4::new(1, 2, 2, 3);
-        let b: i16x4 = i16x4::new(2, 3, 3, 8);
-        let e: [i16; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
-        let r: [i16; 8] = transmute(vuzp_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzpq_s8() {
-        let a: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16);
-        let b: i8x16 = i8x16::new(2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32);
-        let e: [i8; 32] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32];
-        let r: [i8; 32] = transmute(vuzpq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzpq_s16() {
-        let a: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 3, 8);
-        let b: i16x8 = i16x8::new(2, 3, 3, 8, 3, 15, 8, 16);
-        let e: [i16; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
-        let r: [i16; 16] = transmute(vuzpq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzpq_s32() {
-        let a: i32x4 = i32x4::new(1, 2, 2, 3);
-        let b: i32x4 = i32x4::new(2, 3, 3, 8);
-        let e: [i32; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
-        let r: [i32; 8] = transmute(vuzpq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp_u8() {
-        let a: u8x8 = u8x8::new(1, 2, 2, 3, 2, 3, 3, 8);
-        let b: u8x8 = u8x8::new(2, 3, 3, 8, 3, 15, 8, 16);
-        let e: [u8; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
-        let r: [u8; 16] = transmute(vuzp_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp_u16() {
-        let a: u16x4 = u16x4::new(1, 2, 2, 3);
-        let b: u16x4 = u16x4::new(2, 3, 3, 8);
-        let e: [u16; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
-        let r: [u16; 8] = transmute(vuzp_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzpq_u8() {
-        let a: u8x16 = u8x16::new(1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16);
-        let b: u8x16 = u8x16::new(2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32);
-        let e: [u8; 32] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32];
-        let r: [u8; 32] = transmute(vuzpq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzpq_u16() {
-        let a: u16x8 = u16x8::new(1, 2, 2, 3, 2, 3, 3, 8);
-        let b: u16x8 = u16x8::new(2, 3, 3, 8, 3, 15, 8, 16);
-        let e: [u16; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
-        let r: [u16; 16] = transmute(vuzpq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzpq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 2, 3);
-        let b: u32x4 = u32x4::new(2, 3, 3, 8);
-        let e: [u32; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
-        let r: [u32; 8] = transmute(vuzpq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp_p8() {
-        let a: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 3, 8);
-        let b: i8x8 = i8x8::new(2, 3, 3, 8, 3, 15, 8, 16);
-        let e: [u8; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
-        let r: [u8; 16] = transmute(vuzp_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp_p16() {
-        let a: i16x4 = i16x4::new(1, 2, 2, 3);
-        let b: i16x4 = i16x4::new(2, 3, 3, 8);
-        let e: [u16; 8] = [1, 2, 2, 3, 2, 3, 3, 8];
-        let r: [u16; 8] = transmute(vuzp_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzpq_p8() {
-        let a: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16);
-        let b: i8x16 = i8x16::new(2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32);
-        let e: [u8; 32] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32];
-        let r: [u8; 32] = transmute(vuzpq_p8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzpq_p16() {
-        let a: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 3, 8);
-        let b: i16x8 = i16x8::new(2, 3, 3, 8, 3, 15, 8, 16);
-        let e: [u16; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16];
-        let r: [u16; 16] = transmute(vuzpq_p16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp_s32() {
-        let a: i32x2 = i32x2::new(1, 2);
-        let b: i32x2 = i32x2::new(2, 3);
-        let e: [i32; 4] = [1, 2, 2, 3];
-        let r: [i32; 4] = transmute(vuzp_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp_u32() {
-        let a: u32x2 = u32x2::new(1, 2);
-        let b: u32x2 = u32x2::new(2, 3);
-        let e: [u32; 4] = [1, 2, 2, 3];
-        let r: [u32; 4] = transmute(vuzp_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzp_f32() {
-        let a: f32x2 = f32x2::new(1., 2.);
-        let b: f32x2 = f32x2::new(2., 6.);
-        let e: [f32; 4] = [1., 2., 2., 6.];
-        let r: [f32; 4] = transmute(vuzp_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuzpq_f32() {
-        let a: f32x4 = f32x4::new(1., 2., 2., 4.);
-        let b: f32x4 = f32x4::new(2., 6., 6., 8.);
-        let e: [f32; 8] = [1., 2., 2., 6., 2., 4., 6., 8.];
-        let r: [f32; 8] = transmute(vuzpq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_u8() {
-        let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: u8x8 = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: u16x8 = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let r: u16x8 = transmute(vabal_u8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_u16() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u16x4 = u16x4::new(1, 2, 3, 4);
-        let c: u16x4 = u16x4::new(10, 10, 10, 10);
-        let e: u32x4 = u32x4::new(10, 10, 10, 10);
-        let r: u32x4 = transmute(vabal_u16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_u32() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u32x2 = u32x2::new(1, 2);
-        let c: u32x2 = u32x2::new(10, 10);
-        let e: u64x2 = u64x2::new(10, 10);
-        let r: u64x2 = transmute(vabal_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_s8() {
-        let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let c: i8x8 = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let e: i16x8 = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
-        let r: i16x8 = transmute(vabal_s8(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_s16() {
-        let a: i32x4 = i32x4::new(1, 2, 3, 4);
-        let b: i16x4 = i16x4::new(1, 2, 3, 4);
-        let c: i16x4 = i16x4::new(10, 10, 10, 10);
-        let e: i32x4 = i32x4::new(10, 10, 10, 10);
-        let r: i32x4 = transmute(vabal_s16(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabal_s32() {
-        let a: i64x2 = i64x2::new(1, 2);
-        let b: i32x2 = i32x2::new(1, 2);
-        let c: i32x2 = i32x2::new(10, 10);
-        let e: i64x2 = i64x2::new(10, 10);
-        let r: i64x2 = transmute(vabal_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabs_s8() {
-        let a: i8x8 = i8x8::new(-128, 0x7F, -6, -5, -4, -3, -2, -1);
-        let e: i8x8 = i8x8::new(0x7F, 0x7F, 6, 5, 4, 3, 2, 1);
-        let r: i8x8 = transmute(vqabs_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabsq_s8() {
-        let a: i8x16 = i8x16::new(-128, 0x7F, -6, -5, -4, -3, -2, -1, 0, -127, 127, 1, 2, 3, 4, 5);
-        let e: i8x16 = i8x16::new(0x7F, 0x7F, 6, 5, 4, 3, 2, 1, 0, 127, 127, 1, 2, 3, 4, 5);
-        let r: i8x16 = transmute(vqabsq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabs_s16() {
-        let a: i16x4 = i16x4::new(-32768, 0x7F_FF, -6, -5);
-        let e: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 6, 5);
-        let r: i16x4 = transmute(vqabs_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabsq_s16() {
-        let a: i16x8 = i16x8::new(-32768, 0x7F_FF, -6, -5, -4, -3, -2, -1);
-        let e: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 6, 5, 4, 3, 2, 1);
-        let r: i16x8 = transmute(vqabsq_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabs_s32() {
-        let a: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
-        let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
-        let r: i32x2 = transmute(vqabs_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vqabsq_s32() {
-        let a: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, -6, -5);
-        let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 6, 5);
-        let r: i32x4 = transmute(vqabsq_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
+#[doc = "Zip vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzipq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(zip)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
+    let a0: poly16x8_t = simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]);
+    let b0: poly16x8_t = simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]);
+    transmute((a0, b0))
 }
diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs
index 19bdca228b..33065b0b9d 100644
--- a/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -11,6 +11,142 @@ use crate::{core_arch::simd::*, hint::unreachable_unchecked, intrinsics::simd::*
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub trait AsUnsigned {
+    #[cfg_attr(
+        not(target_arch = "arm"),
+        stable(feature = "neon_intrinsics", since = "1.59.0")
+    )]
+    #[cfg_attr(
+        target_arch = "arm",
+        unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    )]
+    type Unsigned: ?Sized;
+
+    #[cfg_attr(
+        not(target_arch = "arm"),
+        stable(feature = "neon_intrinsics", since = "1.59.0")
+    )]
+    #[cfg_attr(
+        target_arch = "arm",
+        unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    )]
+    unsafe fn as_unsigned(self) -> Self::Unsigned;
+}
+
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub trait AsSigned {
+    #[cfg_attr(
+        not(target_arch = "arm"),
+        stable(feature = "neon_intrinsics", since = "1.59.0")
+    )]
+    #[cfg_attr(
+        target_arch = "arm",
+        unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    )]
+    type Signed: ?Sized;
+
+    #[cfg_attr(
+        not(target_arch = "arm"),
+        stable(feature = "neon_intrinsics", since = "1.59.0")
+    )]
+    #[cfg_attr(
+        target_arch = "arm",
+        unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    )]
+    unsafe fn as_signed(self) -> Self::Signed;
+}
+
+macro_rules! impl_sign_conversions {
+    ($(($signed:ty, $unsigned:ty))*) => ($(
+        #[cfg_attr(
+            not(target_arch = "arm"),
+            stable(feature = "neon_intrinsics", since = "1.59.0")
+        )]
+        #[cfg_attr(
+            target_arch = "arm",
+            unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+        )]
+        impl AsUnsigned for $signed {
+            type Unsigned = $unsigned;
+
+            #[inline]
+            unsafe fn as_unsigned(self) -> $unsigned {
+                crate::mem::transmute(self)
+            }
+        }
+
+        #[cfg_attr(
+            not(target_arch = "arm"),
+            stable(feature = "neon_intrinsics", since = "1.59.0")
+        )]
+        #[cfg_attr(
+            target_arch = "arm",
+            unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+        )]
+        impl AsSigned for $unsigned {
+            type Signed = $signed;
+
+            #[inline]
+            unsafe fn as_signed(self) -> $signed {
+                crate::mem::transmute(self)
+            }
+        }
+    )*)
+}
+
+macro_rules! impl_sign_conversions_neon {
+    ($(($signed:ty, $unsigned:ty))*) => ($(
+        #[cfg_attr(
+            not(target_arch = "arm"),
+            stable(feature = "neon_intrinsics", since = "1.59.0")
+        )]
+        #[cfg_attr(
+            target_arch = "arm",
+            unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+        )]
+        impl AsUnsigned for $signed {
+            type Unsigned = $unsigned;
+
+            #[inline]
+            unsafe fn as_unsigned(self) -> $unsigned {
+                crate::mem::transmute(self)
+            }
+        }
+
+        #[cfg_attr(
+            not(target_arch = "arm"),
+            stable(feature = "neon_intrinsics", since = "1.59.0")
+        )]
+        #[cfg_attr(
+            target_arch = "arm",
+            unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+        )]
+        impl AsSigned for $unsigned {
+            type Signed = $signed;
+
+            #[inline]
+            unsafe fn as_signed(self) -> $signed {
+                crate::mem::transmute(self)
+            }
+        }
+    )*)
+}
+
 pub(crate) type p8 = u8;
 pub(crate) type p16 = u16;
 pub(crate) type p64 = u64;
@@ -1033,6 +1169,88 @@ pub struct poly64x2x4_t(
     pub poly64x2_t,
 );
 
+impl_sign_conversions! {
+    (i8, u8)
+    (i16, u16)
+    (i32, u32)
+    (i64, u64)
+    (*const i8, *const u8)
+    (*const i16, *const u16)
+    (*const i32, *const u32)
+    (*const i64, *const u64)
+    (*mut i8, *mut u8)
+    (*mut i16, *mut u16)
+    (*mut i32, *mut u32)
+    (*mut i64, *mut u64)
+    (int16x4_t, uint16x4_t)
+    (int16x8_t, uint16x8_t)
+    (int32x2_t, uint32x2_t)
+    (int32x4_t, uint32x4_t)
+    (int64x1_t, uint64x1_t)
+    (int64x2_t, uint64x2_t)
+    (int8x16_t, uint8x16_t)
+    (int8x8_t, uint8x8_t)
+    (uint16x4_t, int16x4_t)
+    (uint16x8_t, int16x8_t)
+    (uint32x2_t, int32x2_t)
+    (uint32x4_t, int32x4_t)
+    (uint64x1_t, int64x1_t)
+    (uint64x2_t, int64x2_t)
+    (uint8x16_t, int8x16_t)
+    (uint8x8_t, int8x8_t)
+}
+
+impl_sign_conversions_neon! {
+    (int16x4x2_t, uint16x4x2_t)
+    (int16x4x3_t, uint16x4x3_t)
+    (int16x4x4_t, uint16x4x4_t)
+    (int16x8x2_t, uint16x8x2_t)
+    (int16x8x3_t, uint16x8x3_t)
+    (int16x8x4_t, uint16x8x4_t)
+    (int32x2x2_t, uint32x2x2_t)
+    (int32x2x3_t, uint32x2x3_t)
+    (int32x2x4_t, uint32x2x4_t)
+    (int32x4x2_t, uint32x4x2_t)
+    (int32x4x3_t, uint32x4x3_t)
+    (int32x4x4_t, uint32x4x4_t)
+    (int64x1x2_t, uint64x1x2_t)
+    (int64x1x3_t, uint64x1x3_t)
+    (int64x1x4_t, uint64x1x4_t)
+    (int64x2x2_t, uint64x2x2_t)
+    (int64x2x3_t, uint64x2x3_t)
+    (int64x2x4_t, uint64x2x4_t)
+    (int8x16x2_t, uint8x16x2_t)
+    (int8x16x3_t, uint8x16x3_t)
+    (int8x16x4_t, uint8x16x4_t)
+    (int8x8x2_t, uint8x8x2_t)
+    (int8x8x3_t, uint8x8x3_t)
+    (int8x8x4_t, uint8x8x4_t)
+    (uint16x4x2_t, int16x4x2_t)
+    (uint16x4x3_t, int16x4x3_t)
+    (uint16x4x4_t, int16x4x4_t)
+    (uint16x8x2_t, int16x8x2_t)
+    (uint16x8x3_t, int16x8x3_t)
+    (uint16x8x4_t, int16x8x4_t)
+    (uint32x2x2_t, int32x2x2_t)
+    (uint32x2x3_t, int32x2x3_t)
+    (uint32x2x4_t, int32x2x4_t)
+    (uint32x4x2_t, int32x4x2_t)
+    (uint32x4x3_t, int32x4x3_t)
+    (uint32x4x4_t, int32x4x4_t)
+    (uint64x1x2_t, int64x1x2_t)
+    (uint64x1x3_t, int64x1x3_t)
+    (uint64x1x4_t, int64x1x4_t)
+    (uint64x2x2_t, int64x2x2_t)
+    (uint64x2x3_t, int64x2x3_t)
+    (uint64x2x4_t, int64x2x4_t)
+    (uint8x16x2_t, int8x16x2_t)
+    (uint8x16x3_t, int8x16x3_t)
+    (uint8x16x4_t, int8x16x4_t)
+    (uint8x8x2_t, int8x8x2_t)
+    (uint8x8x3_t, int8x8x3_t)
+    (uint8x8x4_t, int8x8x4_t)
+}
+
 #[allow(improper_ctypes)]
 extern "unadjusted" {
     // absolute value (64-bit)
diff --git a/crates/stdarch-gen2/spec/neon/aarch64.spec.yml b/crates/stdarch-gen2/spec/neon/aarch64.spec.yml
new file mode 100644
index 0000000000..fd704c305e
--- /dev/null
+++ b/crates/stdarch-gen2/spec/neon/aarch64.spec.yml
@@ -0,0 +1,9852 @@
+arch_cfgs:
+  - arch_name: aarch64
+    target_feature: [neon]
+    llvm_prefix: llvm.aarch64.neon
+
+# Repeatedly used anchors 
+# #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+neon-stable: &neon-stable
+  FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+
+# #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+neon-unstable: &neon-unstable
+  FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+
+# #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+neon-v7: &neon-v7
+  FnCall: [cfg_attr, ['target_arch = "arm"', { FnCall: [target_feature, [ 'enable = "v7"']]} ]]
+
+# #[target_feature(enable = "neon,v7")]
+enable-v7: &enable-v7
+  FnCall: [target_feature, ['enable = "neon,v7"']]
+
+# #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+neon-v8: &neon-v8
+  FnCall: [cfg_attr, ['target_arch = "arm"', { FnCall: [target_feature, [ 'enable = "v8"']]} ]]
+
+target-is-arm: &target-is-arm
+  FnCall: [cfg, ['target_arch = "arm"']]
+
+# #[cfg(not(target_arch = "arm"))]
+target-not-arm: &target-not-arm
+  FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm"']]}]]
+
+neon-target-aarch64-arm64ec: &neon-target-aarch64-arm64ec
+  FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]
+
+# #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
+neon-stable-not-arm: &neon-stable-not-arm
+  FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, *neon-stable]]
+
+#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+neon-unstable-is-arm: &neon-unstable-is-arm
+  FnCall: [ cfg_attr, ['target_arch = "arm"', *neon-unstable]] 
+
+# #[cfg_attr(all(test, not(target_env = "msvc"))]
+msvc-disabled: &msvc-disabled
+  FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]
+
+# all(test, target_arch = "arm")
+test-is-arm: &test-is-arm
+  FnCall: [all, [test, 'target_arch = "arm"']]
+
+# #[target_feature(enable = "neon,aes")]
+neon-aes: &neon-aes
+  FnCall: [target_feature, ['enable = "neon,aes"']]
+
+# #[target_feature(enable = "neon,i8mm")]
+neon-i8mm: &neon-i8mm
+  FnCall: [target_feature, ['enable = "neon,i8mm"']]
+
+#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
+neon-unstable-i8mm: &neon-unstable-i8mm
+  FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']] }, { FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] } ]]
+
+# #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+neon-unstable-fcma: &neon-unstable-fcma
+  FnCall: [unstable, ['feature = "stdarch_neon_fcma"', 'issue = "117222"']]
+
+intrinsics:
+  - name: "vaddd_{type}"
+    doc: Add
+    arguments: ["a: {type}", "b: {type}"]
+    return_type: "{type}"
+    attr: [*neon-stable]
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - i64
+      - u64
+    compose:
+      - MethodCall:
+          - a
+          - wrapping_add
+          - - b
+
+  - name: "veor3{neon_type.no}"
+    doc: Three-way exclusive OR
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    assert_instr: [eor3]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x16_t
+      - int16x8_t
+      - int32x4_t
+      - int64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.crypto.eor3s.{neon_type}"
+          links:
+            - link: "llvm.aarch64.crypto.eor3s.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "veor3{neon_type.no}"
+    doc: Three-way exclusive OR
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    assert_instr: [eor3]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x16_t
+      - uint16x8_t
+      - uint32x4_t
+      - uint64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.crypto.eor3u.{neon_type}"
+          links:
+            - link: "llvm.aarch64.crypto.eor3u.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vabd{neon_type.no}"
+    doc: Absolute difference between the arguments of Floating
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [fabd]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "fabd.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fabd.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vabd{type[0]}"
+    doc: "Floating-point absolute difference"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [fabd]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['s_f32', 'f32']
+      - ['d_f64', 'f64']
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vabd_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+                  - FnCall: ["vdup_n_{type[1]}", [b]]
+            - 0
+
+  - name: "vabdl_high{neon_type[0].noq}"
+    doc: Signed Absolute difference Long
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [sabdl]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, int16x8_t, int8x8_t, uint8x8_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - a
+                - a
+                - [8, 9, 10, 11, 12, 13, 14, 15]
+      - Let:
+          - d
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - b
+                - b
+                - [8, 9, 10, 11, 12, 13, 14, 15]
+      - Let:
+          - e
+          - "{neon_type[3]}"
+          - FnCall:
+              - simd_cast
+              - - FnCall:
+                    - "vabd_{neon_type[0]}"
+                    - - c
+                      - d
+      - FnCall:
+          - simd_cast
+          - - e
+
+  - name: "vabdl_high{neon_type[0].noq}"
+    doc: Signed Absolute difference Long
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall:
+          - stable
+          - - 'feature = "neon_intrinsics"'
+            - 'since = "1.59.0"'
+    assert_instr: [sabdl]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int32x4_t, int16x4_t, uint16x4_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - a
+                - a
+                - [4, 5, 6, 7]
+      - Let:
+          - d
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - b
+                - b
+                - [4, 5, 6, 7]
+      - Let:
+          - e
+          - "{neon_type[3]}"
+          - FnCall:
+              - simd_cast
+              - - FnCall:
+                    - "vabd_{neon_type[0]}"
+                    - - c
+                      - d
+      - FnCall:
+          - simd_cast
+          - - e
+
+  - name: "vabdl_high{neon_type[0].noq}"
+    doc: Signed Absolute difference Long
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall:
+          - stable
+          - - 'feature = "neon_intrinsics"'
+            - 'since = "1.59.0"'
+    assert_instr: [sabdl]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int64x2_t, int32x2_t, uint32x2_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - a
+                - a
+                - [2, 3]
+      - Let:
+          - d
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_shuffle!
+              - - b
+                - b
+                - [2, 3]
+      - Let:
+          - e
+          - "{neon_type[3]}"
+          - FnCall:
+              - simd_cast
+              - - FnCall:
+                    - "vabd_{neon_type[0]}"
+                    - - c
+                      - d
+      - FnCall:
+          - simd_cast
+          - - e
+
+  - name: "vceq{neon_type[0].no}"
+    doc: "Compare bitwise Equal (vector)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint64x1_t, uint64x1_t]
+      - [uint64x2_t, uint64x2_t]
+      - [int64x1_t, uint64x1_t]
+      - [int64x2_t, uint64x2_t]
+      - [poly64x1_t, uint64x1_t]
+      - [poly64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_eq, [a, b]]
+
+  - name: "vceq{neon_type[0].no}"
+    doc: "Floating-point compare equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_eq, [a, b]]
+
+  - name: "vceq{type[0]}"
+    doc: "Floating-point compare equal"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vceq_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+                  - FnCall: ["vdup_n_{type[1]}", [b]]
+            - '0'
+
+  - name: "vceqd_{type[0]}"
+    doc: "Compare bitwise equal"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "u64", "s64"]
+      - ["u64", "u64", "u64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vceq_{type[2]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vtst{neon_type[0].no}"
+    doc: "Signed compare bitwise Test bits nonzero"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
+      - [int64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)']
+      - [poly64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
+      - [poly64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)']
+    compose:
+      - Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}]
+      - Let: [d, "{type[2]}", "{type[3]}"]
+      - FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]]
+
+  - name: "vtstd_{type[0]}"
+    doc: "Compare bitwise test bits nonzero"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tst]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "u64", "s64"]
+      - ["u64", "u64", "u64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vtst_{type[2]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vuqadd{type[0]}"
+    doc: "Signed saturating accumulate of unsigned value"
+    arguments: ["a: {type[1]}", "b: {type[2]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_s32", "i32", "u32"]
+      - ["d_s64", "i64", "u64"]
+    compose:
+      - LLVMLink:
+          name: "vuqadd{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.suqadd.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vuqadd{type[0]}"
+    doc: "Signed saturating accumulate of unsigned value"
+    arguments: ["a: {type[1]}", "b: {type[2]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["b_s8", "i8", "u8", "s8"]
+      - ["h_s16", "i16", "u16", "s16"]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vuqadd_{type[3]}"
+                - - FnCall: ["vdup_n_{type[3]}", [a]]
+                  - FnCall: ["vdup_n_{type[2]}", [b]]
+            - '0'
+
+  - name: "vabs{neon_type.no}"
+    doc: "Floating-point absolute value"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fabs]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - FnCall: [simd_fabs, [a]]
+
+  - name: "vcgt{neon_type[0].no}"
+    doc: "Compare signed greater than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x1_t, uint64x1_t]
+      - [int64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_gt, [a, b]]
+
+  - name: "vcgt{neon_type.no}"
+    doc: "Compare unsigned greater than"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x1_t
+      - uint64x2_t
+    compose:
+      - FnCall: [simd_gt, [a, b]]
+
+  - name: "vcgt{neon_type[0].no}"
+    doc: "Floating-point compare greater than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_gt, [a, b]]
+
+  - name: "vcgt{type[0]}"
+    doc: "Floating-point compare greater than"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall:
+          - 'simd_extract!'
+          - - FnCall:
+                - "vcgt_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+                  - FnCall: ["vdup_n_{type[1]}", [b]]
+            - '0'
+
+  - name: "vclt{neon_type[0].no}"
+    doc: "Compare signed less than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x1_t, uint64x1_t]
+      - [int64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_lt, [a, b]]
+
+  - name: "vcle{neon_type[0].no}"
+    doc: "Compare signed less than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x1_t, uint64x1_t]
+      - [int64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_le, [a, b]]
+
+  - name: "vcle{neon_type[0].no}"
+    doc: "Floating-point compare less than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_le, [a, b]]
+
+  - name: "vcle{type[0]}"
+    doc: "Floating-point compare less than or equal"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vcle_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+                  - FnCall: ["vdup_n_{type[1]}", [b]]
+            - '0'
+
+  - name: "vcge{neon_type[0].no}"
+    doc: "Compare signed greater than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x1_t, uint64x1_t]
+      - [int64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_ge, [a, b]]
+
+  - name: "vcgez{neon_type[0].no}"
+    doc: "Compare signed greater than or equal to zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)']
+      - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)']
+      - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)']
+      - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)']
+      - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall: [simd_ge, [a, {FnCall: [transmute, [b]]}]]
+
+  - name: "vcgezd_s64"
+    doc: "Compare signed greater than or equal to zero"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "u64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vcgez_s64
+                - - FnCall: [transmute, [a]]
+
+  - name: "vclez{neon_type[0].no}"
+    doc: "Compare signed less than or equal to zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmle]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)']
+      - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)']
+      - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)']
+      - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)']
+      - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall:
+          - simd_le
+          - - a
+            - FnCall: [transmute, [b]]
+
+  - name: "vclez{neon_type[0].no}"
+    doc: "Floating-point compare less than or equal to zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmle]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
+      - [float32x4_t, uint32x4_t, f32x4, 'f32x4::new(0.0, 0.0, 0.0, 0.0)']
+      - [float64x1_t, uint64x1_t, f64, '0.0']
+      - [float64x2_t, uint64x2_t, f64x2, 'f64x2::new(0.0, 0.0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall:
+          - simd_le
+          - - a
+            - FnCall: [transmute, [b]]
+
+  - name: "vclez{type[0]}"
+    doc: "Floating-point compare less than or equal to zero"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vclez_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+            - '0'
+
+  - name: "vcltz{neon_type[0].no}"
+    doc: "Compare signed less than zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmlt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)']
+      - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)']
+      - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)']
+      - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)']
+      - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall:
+          - simd_lt
+          - - a
+            - FnCall: [transmute, [b]]
+
+  - name: "vcltz{neon_type[0].no}"
+    doc: "Floating-point compare less than zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmlt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
+      - [float32x4_t, uint32x4_t, f32x4, 'f32x4::new(0.0, 0.0, 0.0, 0.0)']
+      - [float64x1_t, uint64x1_t, f64, '0.0']
+      - [float64x2_t, uint64x2_t, f64x2, 'f64x2::new(0.0, 0.0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall:
+          - simd_lt
+          - - a
+            - FnCall: [transmute, [b]]
+
+  - name: "vcltz{type[0]}"
+    doc: "Floating-point compare less than zero"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vcltz_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+            - '0'
+
+  - name: "vcltzd_s64"
+    doc: "Compare less than zero"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [asr]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "u64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vcltz_s64
+                - - FnCall: [transmute, [a]]
+
+  - name: "vcagt{neon_type[0].no}"
+    doc: "Floating-point absolute compare greater than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcagt{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.facgt.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcagt{type[0]}"
+    doc: "Floating-point absolute compare greater than"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32", i32]
+      - ["d_f64", "f64", "u64", i64]
+    compose:
+      - LLVMLink:
+          name: "vcagt{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcage{neon_type[0].no}"
+    doc: "Floating-point absolute compare greater than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcage{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.facge.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcage{type[0]}"
+    doc: "Floating-point absolute compare greater than or equal"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32", i32]
+      - ["d_f64", "f64", "u64", i64]
+    compose:
+      - LLVMLink:
+          name: "vcage{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcalt{neon_type[0].no}"
+    doc: "Floating-point absolute compare less than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - FnCall: ["vcagt{neon_type[0].no}", [b, a]]
+
+  - name: "vcalt{type[0]}"
+    doc: "Floating-point absolute compare less than"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall: ["vcagt{type[0]}", [b, a]]
+
+  - name: "vcale{neon_type[0].no}"
+    doc: "Floating-point absolute compare less than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - FnCall: ["vcage{neon_type[0].no}", [b, a]]
+
+  - name: "vcale{type[0]}"
+    doc: "Floating-point absolute compare less than or equal"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall: ["vcage{type[0]}", [b, a]]
+
+  - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x1_t, float64x1_t]
+      - [int64x2_t, float64x2_t]
+    compose:
+      - FnCall: [simd_cast, [a]]
+
+  - name: "vcvt{type[0]}_{type[3]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "i32", "f32", s32]
+      - ["d_f64", "i64", "f64", s64]
+    compose:
+      - Identifier: ["a as {type[2]}", Symbol]
+
+  - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint64x1_t, float64x1_t]
+      - [uint64x2_t, float64x2_t]
+    compose:
+      - FnCall: [simd_cast, [a]]
+
+  - name: "vcvt{type[2]}_{type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u32", "f32", "s_f32"]
+      - ["u64", "f64", "d_f64"]
+    compose:
+      - Identifier: ["a as {type[1]}", Symbol]
+
+  - name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x1_t, float64x1_t]
+      - [int64x2_t, float64x2_t]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 64']]
+      - LLVMLink:
+          name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i32", "f32", 's_n_f32_s32', 'N >= 1 && N <= 32']
+      - ["i64", "f64", 'd_n_f64_s64', 'N >= 1 && N <= 64']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 64']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments:
+            - "a: {type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.vcvtfxs2fp.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{type[2]}", [a, N]]
+
+  - name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint64x1_t, float64x1_t]
+      - [uint64x2_t, float64x2_t]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 64']]
+      - LLVMLink:
+          name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u32", "f32", 's_n_f32_u32', 'N >= 1 && N <= 32']
+      - ["u64", "f64", 'd_n_f64_u64', 'N >= 1 && N <= 64']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments:
+            - "a: {type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{type[2]}", ["a.as_signed()", N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "i32", "s_s32_f32", "32"]
+      - ["f64", "i64", "d_s64_f64", "64"]
+    compose:
+      - Identifier: ["a as i{type[3]}", Symbol]
+
+  - name: "vcvt{type[2]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "u32", "s_u32_f32"]
+      - ["f64", "u64", "d_u64_f64"]
+    compose:
+      - Identifier: ["a as {type[1]}", Symbol]
+
+  - name: "vcvt_f64_f32"
+    doc: "Floating-point convert to higher precision long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float64x2_t]
+    compose:
+      - FnCall: [simd_cast, [a]]
+
+  - name: "vcvt_high_f64_f32"
+    doc: "Floating-point convert to higher precision long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x4_t, float64x2_t]
+    compose:
+      - Let:
+          - b
+          - float32x2_t
+          - FnCall:
+              - simd_shuffle!
+              - - a
+                - a
+                - '[2, 3]'
+      - FnCall: [simd_cast, [b]]
+
+  - name: "vcvt_f32_f64"
+    doc: "Floating-point convert to lower precision narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x2_t, float32x2_t]
+    compose:
+      - FnCall: [simd_cast, [a]]
+
+  - name: "vcvt_high_f32_f64"
+    doc: "Floating-point convert to lower precision narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float64x2_t, float32x4_t]
+    compose:
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - FnCall: [simd_cast, [b]]
+            - '[0, 1, 2, 3]'
+
+  - name: "vcvtx_f32_f64"
+    doc: "Floating-point convert to lower precision narrow, rounding to odd"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x2_t, float32x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvtx_f32_f64"
+          links:
+            - link: "llvm.aarch64.neon.fcvtxn.v2f32.v2f64"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtxd_f32_f64"
+    doc: "Floating-point convert to lower precision narrow, rounding to odd"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f64", "f32"]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - vcvtx_f32_f64
+                - - FnCall: [vdupq_n_f64, [a]]
+            - '0'
+
+  - name: "vcvtx_high_f32_f64"
+    doc: "Floating-point convert to lower precision narrow, rounding to odd"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float64x2_t, float32x4_t]
+    compose:
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - FnCall: [vcvtx_f32_f64, [b]]
+            - '[0, 1, 2, 3]'
+
+  - name: "vcvt{type[2]}"
+    doc: "Floating-point convert to fixed-point, rounding toward zero"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, int64x1_t, _n_s64_f64, '64']
+      - [float64x2_t, int64x2_t, q_n_s64_f64, '64']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments: ["a: {type[0]}", "n: i32"]
+          links:
+            - link: "llvm.aarch64.neon.vcvtfp2fxs.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{type[2]}", [a, N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Floating-point convert to fixed-point, rounding toward zero"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "i32", s_n_s32_f32, '32']
+      - ["f64", "i64", d_n_s64_f64, '64']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments: ["a: {type[0]}", "n: i32"]
+          links:
+            - link: "llvm.aarch64.neon.vcvtfp2fxs.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{type[2]}", [a, N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Floating-point convert to fixed-point, rounding toward zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t, _n_u64_f64, '64']
+      - [float64x2_t, uint64x2_t, q_n_u64_f64, '64']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments: ["a: {type[0]}", "n: i32"]
+          links:
+            - link: "llvm.aarch64.neon.vcvtfp2fxu.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{type[2]}", [a, N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Floating-point convert to fixed-point, rounding toward zero"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "u32", s_n_u32_f32, '32']
+      - ["f64", "u64", d_n_u64_f64, '64']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= {type[3]}']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments: ["a: {type[0]}", "n: i32"]
+          links:
+            - link: "llvm.aarch64.neon.vcvtfp2fxu.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{type[2]}", [a, N]]
+
+  - name: "vcvta{type[2]}"
+    doc: "Floating-point convert to signed integer, rounding to nearest with ties to away"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, int32x2_t, _s32_f32]
+      - [float32x4_t, int32x4_t, q_s32_f32]
+      - [float64x1_t, int64x1_t, _s64_f64]
+      - [float64x2_t, int64x2_t, q_s64_f64]
+    compose:
+      - LLVMLink:
+          name: "vcvta{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtas.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvta{type[2]}"
+    doc: "Floating-point convert to integer, rounding to nearest with ties to away"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "i32", 's_s32_f32']
+      - ["f64", "i64", 'd_s64_f64']
+    compose:
+      - LLVMLink:
+          name: "vcvta{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvta{type[2]}"
+    doc: "Floating-point convert to integer, rounding to nearest with ties to away"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "u32", 's_u32_f32']
+      - ["f64", "u64", 'd_u64_f64']
+    compose:
+      - LLVMLink:
+          name: "vcvta{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtau.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtn{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to signed integer, rounding to nearest with ties to even"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, int32x2_t]
+      - [float32x4_t, int32x4_t]
+      - [float64x1_t, int64x1_t]
+      - [float64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvtn{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtns.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtn{type[2]}"
+    doc: "Floating-point convert to signed integer, rounding to nearest with ties to even"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "i32", 's_s32_f32']
+      - ["f64", "i64", 'd_s64_f64']
+    compose:
+      - LLVMLink:
+          name: "vcvtn{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtm{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to signed integer, rounding toward minus infinity"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, int32x2_t]
+      - [float32x4_t, int32x4_t]
+      - [float64x1_t, int64x1_t]
+      - [float64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvtm{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtms.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtm{type[2]}"
+    doc: "Floating-point convert to signed integer, rounding toward minus infinity"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "i32", 's_s32_f32']
+      - ["f64", "i64", 'd_s64_f64']
+    compose:
+      - LLVMLink:
+          name: "vcvtm{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtp{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to signed integer, rounding toward plus infinity"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, int32x2_t]
+      - [float32x4_t, int32x4_t]
+      - [float64x1_t, int64x1_t]
+      - [float64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvtp{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtps.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtp{type[2]}"
+    doc: "Floating-point convert to signed integer, rounding toward plus infinity"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "i32", 's_s32_f32']
+      - ["f64", "i64", 'd_s64_f64']
+    compose:
+      - LLVMLink:
+          name: "vcvtp{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtn{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvtn{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtnu.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtn{type[2]}"
+    doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "u32", 's_u32_f32']
+      - ["f64", "u64", 'd_u64_f64']
+    compose:
+      - LLVMLink:
+          name: "vcvtn{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtm{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to unsigned integer, rounding toward minus infinity"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvtm{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtmu.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtm{type[2]}"
+    doc: "Floating-point convert to unsigned integer, rounding toward minus infinity"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "u32", s_u32_f32]
+      - ["f64", "u64", d_u64_f64]
+    compose:
+      - LLVMLink:
+          name: "vcvtm{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtp{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to unsigned integer, rounding toward plus infinity"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvtp{neon_type[1].no}_{neon_type[1]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtpu.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvtp{type[2]}"
+    doc: "Floating-point convert to unsigned integer, rounding toward plus infinity"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "u32", s_u32_f32, 'i32']
+      - ["f64", "u64", d_u64_f64, 'u64']
+    compose:
+      - LLVMLink:
+          name: "vcvtp{type[2]}"
+          arguments:
+            - "a: {type[0]}"
+          return_type: "{type[1]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtpu.{type[3]}.{type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vdup{neon_type.laneq_nox}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - poly64x2_t
+      - float64x2_t
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 1]]
+      - FnCall: [simd_shuffle!, [a, a, '[N as u32, N as u32]']]
+
+  - name: "vdup{neon_type[1].lane_nox}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [poly64x1_t, poly64x2_t]
+      - [float64x1_t, float64x2_t]
+    compose:
+      - FnCall: [static_assert!, ['N == 0']]
+      - FnCall: [simd_shuffle!, [a, a, '[N as u32, N as u32]']]
+
+  - name: "vdup{neon_type.lane_nox}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - poly64x1_t
+      - float64x1_t
+    compose:
+      - FnCall: [static_assert!, ['N == 0']]
+      - Identifier: [a, Symbol]
+
+  - name: "vdupd{neon_type[0].lane_nox}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x1_t, "i64"]
+      - [uint64x1_t, "u64"]
+      - [float64x1_t, "f64"]
+    compose:
+      - FnCall: [static_assert!, ['N == 0']]
+      - FnCall: [simd_extract!, [a, 'N as u32']]
+
+  - name: "vdup_laneq_{neon_type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [poly64x2_t, poly64x1_t, 'u64']
+      - [float64x2_t, float64x1_t, 'f64']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 1]]
+      - FnCall:
+          - "transmute::<{type[2]}, _>"
+          - - FnCall: [simd_extract!, [a, 'N as u32']]
+
+  - name: "vdup{type[2]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, "i32", s_lane_s32]
+      - [int64x2_t, "i64", d_laneq_s64]
+      - [uint32x2_t, "u32", s_lane_u32]
+      - [uint64x2_t, "u64", d_laneq_u64]
+      - [float32x2_t, "f32", s_lane_f32]
+      - [float64x2_t, "f64", d_laneq_f64]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 1]]
+      - FnCall: [simd_extract!, [a, 'N as u32']]
+
+  - name: "vdup{type[2]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 4']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, "i8", b_lane_s8]
+      - [int16x8_t, "i16", h_laneq_s16]
+      - [uint8x8_t, "u8", b_lane_u8]
+      - [uint16x8_t, "u16", h_laneq_u16]
+      - [poly8x8_t, "p8", b_lane_p8]
+      - [poly16x8_t, "p16", h_laneq_p16]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 3]]
+      - FnCall: [simd_extract!, [a, 'N as u32']]
+
+  - name: "vdup{type[2]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 8']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, "i8", b_laneq_s8]
+      - [uint8x16_t, "u8", b_laneq_u8]
+      - [poly8x16_t, "p8", b_laneq_p8]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 4]]
+      - FnCall: [simd_extract!, [a, 'N as u32']]
+
+  - name: "vdup{type[2]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, "i16", h_lane_s16]
+      - [int32x4_t, "i32", s_laneq_s32]
+      - [uint16x4_t, "u16", h_lane_u16]
+      - [uint32x4_t, "u32", s_laneq_u32]
+      - [poly16x4_t, "p16", h_lane_p16]
+      - [float32x4_t, "f32", s_laneq_f32]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 2]]
+      - FnCall: [simd_extract!, [a, 'N as u32']]
+
+  - name: "vext{neon_type[0].no}"
+    doc: "Extract vector from pair of vectors"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ext, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [poly64x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [float64x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+    compose:
+      - Identifier: ["{type[1]}", Symbol]
+
+  - name: "vmla{neon_type.no}"
+    doc: "Floating-point multiply-add to accumulator"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - FnCall: [simd_add, [a, {FnCall: [simd_mul, [b, c]]}]]
+
+  - name: "vmlal_high_{neon_type[1]}"
+    doc: "Signed multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]']
+      - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]', '[2, 3]']
+    compose:
+      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]}]
+      - FnCall: ["vmlal_{neon_type[2]}", [a, b, c]]
+
+  - name: "vmlal_high_{neon_type[1]}"
+    doc: "Unsigned multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
+    compose:
+      - Let:
+          - b
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+      - FnCall: ["vmlal_{neon_type[1]}", [a, b, c]]
+
+  - name: "vmlsl_high_{neon_type[1]}"
+    doc: "Signed multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]']
+      - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]']
+    compose:
+      - Let:
+          - b
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+      - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]]
+
+  - name: "vmlsl_high_{neon_type[1]}"
+    doc: "Unsigned multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
+    compose:
+      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
+      - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]]
+
+  - name: "vmovn_high{neon_type[1].noq}"
+    doc: Extract narrow
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr: [*neon-stable]
+    assert_instr: [xtn2]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+    compose:
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall:
+              - simd_cast
+              - - b
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - c
+            - "{type[3]}"
+
+  - name: "vneg{neon_type.no}"
+    doc: Negate
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [neg]
+    safety:
+      unsafe: [neon]
+    types:
+      - int64x1_t
+      - int64x2_t
+    compose:
+      - FnCall:
+          - simd_neg
+          - - a
+
+  - name: "vnegd_s64"
+    doc: Negate
+    arguments: ["a: {type}"]
+    return_type: "{type}"
+    attr: [*neon-stable]
+    assert_instr: [neg]
+    safety:
+      unsafe: [neon]
+    types:
+      - i64
+    compose:
+      - MethodCall: [a, wrapping_neg, []]
+
+  - name: "vneg{neon_type.no}"
+    doc: Negate
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall:
+          - stable
+          - - 'feature = "neon_intrinsics"'
+            - 'since = "1.59.0"'
+    assert_instr: [fneg]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - FnCall:
+          - simd_neg
+          - - a
+
+  - name: "vqneg{type[1]}"
+    doc: Signed saturating negate
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [sqneg]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, 'b_s8', 's8']
+      - [i16, 'h_s16', 's16']
+      - [i32, 's_s32', 's32']
+      - [i64, 'd_s64', 's64']
+    compose:
+      - FnCall:
+          - 'simd_extract!'
+          - - FnCall:
+                - 'vqneg_{type[2]}'
+                - - FnCall: ['vdup_n_{type[2]}', [a]]
+            - 0
+
+  - name: "vqneg{neon_type[0].no}"
+    doc: Signed saturating negate
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [sqneg]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x1_t, 'i64']
+      - [int64x2_t, 'i64']
+    compose:
+      - LLVMLink:
+          name: "sqneg.{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.sqneg.v{neon_type[0].lane}{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqsub{type[1]}"
+    doc: Saturating subtract
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [sqsub]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i32, 's_s32', 'i32']
+      - [i64, 'd_s64', 'i64']
+    compose:
+      - LLVMLink:
+          name: "sqsub.{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.sqsub.{type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqsub{type[1]}"
+    doc: Saturating subtract
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [uqsub]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, 's_u32', 'i32']
+      - [u64, 'd_u64', 'i64']
+    compose:
+      - LLVMLink:
+          name: "uqsub.{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.uqsub.{type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqsub{type[3]}"
+    doc: Saturating subtract
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [sqsub]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8_t, s8, 'b_s8']
+      - [i16, int16x4_t, s16, 'h_s16']
+    compose:
+      - Let:
+          - a
+          - "{neon_type[1]}"
+          - FnCall:
+              - "vdup_n_{type[2]}"
+              - - a
+      - Let:
+          - b
+          - "{neon_type[1]}"
+          - FnCall:
+              - "vdup_n_{type[2]}"
+              - - b
+      - FnCall:
+          - 'simd_extract!'
+          - - FnCall:
+                - "vqsub_{type[2]}"
+                - - a
+                  - b
+            - "0"
+
+  - name: "vqsub{type[3]}"
+    doc: Saturating subtract
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [uqsub]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x8_t, u8, 'b_u8']
+      - [u16, uint16x4_t, u16, 'h_u16']
+    compose:
+      - Let:
+          - a
+          - "{neon_type[1]}"
+          - FnCall:
+              - "vdup_n_{type[2]}"
+              - - a
+      - Let:
+          - b
+          - "{neon_type[1]}"
+          - FnCall:
+              - "vdup_n_{type[2]}"
+              - - b
+      - FnCall:
+          - 'simd_extract!'
+          - - FnCall:
+                - "vqsub_{type[2]}"
+                - - a
+                  - b
+            - "0"
+
+  - name: "vrbit{neon_type.no}"
+    doc: Reverse bit order
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [rbit]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+    compose:
+      - LLVMLink:
+          name: "rbit.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.rbit.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrbit{neon_type[0].no}"
+    doc: Reverse bit order
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [rbit]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [uint8x16_t, int8x16_t]
+      - [poly8x8_t, int8x8_t]
+      - [poly8x16_t, int8x16_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vrbit{neon_type[1].no}"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vrndx{neon_type.no}"
+    doc: "Floating-point round to integral exact, using current rounding mode"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [frintx]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.rint.{neon_type}"
+          links:
+            - link: "llvm.rint.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrnda{neon_type.no}"
+    doc: "Floating-point round to integral, to nearest with ties to away"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [frinta]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.round.{neon_type}"
+          links:
+            - link: "llvm.round.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrndn{neon_type.no}"
+    doc: "Floating-point round to integral, to nearest with ties to even"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [frintn]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "frintn.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.frintn.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrndns_{type}"
+    doc: "Floating-point round to integral, to nearest with ties to even"
+    arguments: ["a: {type}"]
+    return_type: "{type}"
+    attr: [*neon-stable]
+    assert_instr: [frintn]
+    safety:
+      unsafe: [neon]
+    types:
+      - f32
+    compose:
+      - LLVMLink:
+          name: "roundeven.{type}"
+          links:
+            - link: "llvm.roundeven.{type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrndm{neon_type.no}"
+    doc: "Floating-point round to integral, toward minus infinity"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [frintm]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.floor.{neon_type}"
+          links:
+            - link: "llvm.floor.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrndp{neon_type.no}"
+    doc: "Floating-point round to integral, toward plus infinity"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [frintp]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.ceil.{neon_type}"
+          links:
+            - link: "llvm.ceil.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrnd{neon_type.no}"
+    doc: "Floating-point round to integral, toward zero"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [frintz]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.trunc.{neon_type}"
+          links:
+            - link: "llvm.trunc.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrndi{neon_type.no}"
+    doc: "Floating-point round to integral, using current rounding mode"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [frinti]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.nearbyint.{neon_type}"
+          links:
+            - link: "llvm.nearbyint.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vqadd{type[1]}"
+    doc: Saturating add
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [uqadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, 's_u32', i32]
+      - [u64, 'd_u64', i64]
+    compose:
+      - LLVMLink:
+          name: "uqadd.{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.uqadd.{type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqadd{type[1]}"
+    doc: Saturating add
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [sqadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i32, 's_s32', i32]
+      - [i64, 'd_s64', i64]
+    compose:
+      - LLVMLink:
+          name: "uqadd.{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.sqadd.{type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqadd{type[2]}"
+    doc: Saturating add
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [sqadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8_t, 'b_s8']
+      - [i16, int16x4_t, 'h_s16']
+    compose:
+      - Let:
+          - a
+          - "{neon_type[1]}"
+          - FnCall:
+              - "vdup_n_{type[0]}"
+              - - a
+      - Let:
+          - b
+          - "{neon_type[1]}"
+          - FnCall:
+              - "vdup_n_{type[0]}"
+              - - b
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vqadd_{type[0]}"
+                - - a
+                  - b
+            - "0"
+
+  - name: "vqadd{type[2]}"
+    doc: Saturating add
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [uqadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x8_t, 'b_u8']
+      - [u16, uint16x4_t, 'h_u16']
+    compose:
+      - Let:
+          - a
+          - "{neon_type[1]}"
+          - FnCall:
+              - "vdup_n_{type[0]}"
+              - - a
+      - Let:
+          - b
+          - "{neon_type[1]}"
+          - FnCall:
+              - "vdup_n_{type[0]}"
+              - - b
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vqadd_{type[0]}"
+                - - a
+                  - b
+            - "0"
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [ld1]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const f64", float64x1x2_t]
+      - ["*const f64", float64x2x2_t]
+      - ["*const f64", float64x1x3_t]
+      - ["*const f64", float64x2x3_t]
+      - ["*const f64", float64x1x4_t]
+      - ["*const f64", float64x2x4_t]
+    compose:
+      - LLVMLink:
+          name: "vld1{neon_type[1].no}"
+          links:
+            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0f64"
+              arch: aarch64,arm64ec
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x16x2_t, i8, int8x16_t, "4"]
+      - ["*const i64", int64x2x2_t, i64, int64x2_t, "1"]
+      - ["*const f64", float64x2x2_t, f64, float64x2_t, "1"]
+    compose:
+      - FnCall:
+          - "static_assert_uimm_bits!"
+          - - LANE
+            - "{type[4]}"
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "a: {neon_type[3]}"
+            - "b: {neon_type[3]}"
+            - "n: i64"
+            - "ptr: *const i8"
+          links:
+            - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0i8"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vld2{neon_type[1].lane_nox}", ["b.0", "b.1", "LANE as i64", "a as _"]]
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x1x2_t, i64, int64x1_t]
+      - ["*const f64", float64x1x2_t, f64, float64x1_t]
+    compose:
+      - FnCall: ["static_assert!", ['LANE == 0']]
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "a: {neon_type[3]}"
+            - "b: {neon_type[3]}"
+            - "n: i64"
+            - "ptr: *const i8"
+          links:
+            - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0i8"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vld2{neon_type[1].lane_nox}", ["b.0", "b.1", "LANE as i64", "a as _"]]
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - "static_assert!"
+          - - 'LANE == 0'
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].lane_nox}::<LANE>"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - "static_assert!"
+          - - 'LANE == 0'
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].lane_nox}::<LANE>"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u8", uint8x16x2_t, int8x16x2_t, "4"]
+      - ["*const p8", poly8x16x2_t, int8x16x2_t, "4"]
+      - ["*const u64", uint64x2x2_t, int64x2x2_t, "1"]
+    compose:
+      - FnCall:
+          - "static_assert_uimm_bits!"
+          - - LANE
+            - "{type[3]}"
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].lane_nox}::<LANE>"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x2x2_t, int64x2x2_t, "1"]
+    compose:
+      - FnCall: ["static_assert_uimm_bits!", [LANE, '{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].lane_nox}::<LANE>"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [ld2]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const f64", float64x2x2_t, f64, float64x2_t]
+      - ["*const i64", int64x2x2_t, i64, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "ptr: *const {neon_type[3]}"
+          links:
+            - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vld2{neon_type[1].nox}"
+          - - "a as _"
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const f64", float64x1x2_t, f64, float64x1_t]
+    compose:
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "ptr: *const {neon_type[3]}"
+          links:
+            - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vld2{neon_type[1].nox}"
+          - - "a as _"
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [ld2]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x2x2_t, int64x2x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].nox}"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-stable
+    assert_instr: [ld2]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x2x2_t, int64x2x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].nox}"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [ld2r]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x2x2_t, i64]
+      - ["*const f64", float64x1x2_t, f64]
+      - ["*const f64", float64x2x2_t, f64]
+    compose:
+      - LLVMLink:
+          name: "vld2dup.{neon_type[1]}"
+          arguments:
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0{type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vld2{neon_type[1].dup_nox}"
+          - - "a as _"
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [ld2r]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x2x2_t, int64x2x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].dup_nox}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-stable
+    assert_instr: [ld2r]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x2x2_t, int64x2x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].dup_nox}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: "Load multiple 3-element structures to two registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x16x3_t, int8x16_t, i8, '3']
+      - ['*const i64', int64x2x3_t, int64x2_t, i64, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
+      - LLVMLink:
+          name: 'ld3lane.{neon_type[2]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *const i8'
+          links:
+            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: "Load multiple 3-element structures to three registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const f64', float64x2x3_t, float64x2_t, f64, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
+      - LLVMLink:
+          name: 'ld3lane.{neon_type[2]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *const i8'
+          links:
+            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const f64', float64x1x3_t, float64x1_t, f64]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - LLVMLink:
+          name: 'vld3.{neon_type[2]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *const i8'
+          links:
+            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: "Load multiple 3-element structures to two registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i64', int64x1x3_t, int64x1_t, i64]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - LLVMLink:
+          name: 'vld3.{neon_type[2]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *const i8'
+          links:
+            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const p8', poly8x16x3_t, int8x16x3_t, '4']
+      - ['*const u8', uint8x16x3_t, int8x16x3_t, '4']
+      - ['*const u64', uint64x2x3_t, int64x2x3_t, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const u64', uint64x1x3_t, int64x1x3_t, '1']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const p64', poly64x2x3_t, int64x2x3_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', 1]]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const p64', poly64x1x3_t, int64x1x3_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    safety:
+      unsafe: [neon]
+    assert_instr: [ld3]
+    types:
+      - ['*const i64', int64x2x3_t, '*const int64x2_t', i64]
+      - ['*const f64', float64x2x3_t, '*const float64x2_t', f64]
+    compose:
+      - LLVMLink:
+          name: 'vld3{neon_type[1].nox}'
+          arguments:
+            - 'ptr: {type[2]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0v{neon_type[1].lane}{type[3]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    safety:
+      unsafe: [neon]
+    assert_instr: [nop]
+    types:
+      - ['*const f64', float64x1x3_t, '*const float64x1_t', f64]
+    compose:
+      - LLVMLink:
+          name: 'vld3{neon_type[1].nox}'
+          arguments:
+            - 'ptr: {type[2]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0v{neon_type[1].lane}{type[3]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    safety:
+      unsafe: [neon]
+    assert_instr: [ld3]
+    types:
+      - ['*const u64', uint64x2x3_t, int64x2x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-stable
+    safety:
+      unsafe: [neon]
+    assert_instr: [ld3]
+    types:
+      - ['*const p64', poly64x2x3_t, int64x2x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld3{neon_type[1].dup_nox}"
+    doc: Load single 3-element structure and replicate to all lanes of three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [ld3r]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x2x3_t, i64]
+      - ["*const f64", float64x1x3_t, f64]
+      - ["*const f64", float64x2x3_t, f64]
+    compose:
+      - LLVMLink:
+          name: 'ld3r{neon_type[1].dup_nox}'
+          arguments:
+            - 'ptr: {type[0]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld3r.v{neon_type[1].lane}{type[2]}.p0{type[2]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as _']]
+
+  - name: "vld3{neon_type[1].dup_nox}"
+    doc: Load single 3-element structure and replicate to all lanes of three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [ld3r]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x2x3_t, int64x2x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld3{neon_type[2].dup_nox}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld3{neon_type[1].dup_nox}"
+    doc: Load single 3-element structure and replicate to all lanes of three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-stable
+    assert_instr: [ld3r]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x2x3_t, int64x2x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld3{neon_type[2].dup_nox}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [ld4]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const f64', float64x2x4_t, f64, '*const float64x2_t']
+      - ['*const i64', int64x2x4_t, i64, '*const int64x2_t']
+    compose:
+      - LLVMLink:
+          name: 'vld4{neon_type[1].nox}'
+          arguments:
+            - 'ptr: {type[3]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const f64', float64x1x4_t, f64, '*const float64x1_t']
+    compose:
+      - LLVMLink:
+          name: 'vld4{neon_type[1].nox}'
+          arguments:
+            - 'ptr: {type[3]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [ld4]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x2x4_t, int64x2x4_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+      - *neon-aes
+    assert_instr: [ld4]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x2x4_t, int64x2x4_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld4{neon_type[1].lane_nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x16x4_t, int8x16_t, i8, '3']
+      - ['*const i64', int64x2x4_t, int64x2_t, i64, '1']
+      - ['*const f64', float64x2x4_t, float64x2_t, f64, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
+      - LLVMLink:
+          name: 'ld4lane.{neon_type[2]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *const i8'
+          links:
+            - link: 'llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
+
+  - name: "vld4{neon_type[1].lane_nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i64', int64x1x4_t, int64x1_t, i64]
+      - ['*const f64', float64x1x4_t, float64x1_t, f64]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - LLVMLink:
+          name: 'ld4lane.{neon_type[2]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *const i8'
+          links:
+            - link: 'llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
+
+  - name: "vld4{neon_type[1].lane_nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const p8', poly8x16x4_t, int8x16x4_t, '4']
+      - ['*const u8', uint8x16x4_t, int8x16x4_t, '4']
+      - ['*const u64', uint64x2x4_t, int64x2x4_t, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld4{neon_type[1].lane_nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const p64', poly64x2x4_t, int64x2x4_t, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld4{neon_type[1].lane_nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const u64', uint64x1x4_t, int64x1x4_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld4{neon_type[1].lane_nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall:
+          - target_feature
+          - - 'enable = "neon,aes"'
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const p64', poly64x1x4_t, int64x1x4_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vst1{neon_type[1].lane_nox}"
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    types:
+      - ['*mut f64', float64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - Assign:
+          - "*a"
+          - FnCall: [simd_extract!, [b, 'LANE as u32']]
+      - Identifier: [';', Symbol]
+
+  - name: "vst1{neon_type[1].lane_nox}"
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    types:
+      - ['*mut f64', float64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - Assign:
+          - "*a"
+          - FnCall: [simd_extract!, [b, 'LANE as u32']]
+      - Identifier: [';', Symbol]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-stable
+    assert_instr: [st1]
+    types:
+      - ['f64', float64x1x2_t, float64x1_t]
+    compose:
+      - LLVMLink:
+          name: 'st2.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-stable
+    assert_instr: [st2]
+    types:
+      - [i64, int64x2x2_t, int64x2_t]
+      - [f64, float64x2x2_t, float64x2_t]
+    compose:
+      - LLVMLink:
+          name: 'st2.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+
+  - name: "vst2{neon_type[1].lane_nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x1x2_t, int64x1_t]
+      - [f64, float64x1x2_t, float64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - LLVMLink:
+          name: 'st2.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st2lane.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']]
+
+  - name: "vst2{neon_type[1].lane_nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x16x2_t, int8x16_t, '4']
+      - [i64, int64x2x2_t, int64x2_t, '1']
+      - [f64, float64x2x2_t, float64x2_t, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
+      - LLVMLink:
+          name: 'st2.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st2lane.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']]
+
+  - name: "vst2{neon_type[1].lane_nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x16x2_t, int8x16x2_t, '4']
+      - [u64, uint64x2x2_t, int64x2x2_t, '1']
+      - [p8, poly8x16x2_t, int8x16x2_t, '4']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
+      - FnCall:
+          - "vst2{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst2{neon_type[1].lane_nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u64, uint64x1x2_t, int64x1x2_t, '1']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - "vst2{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+    assert_instr: [st2]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u64, uint64x2x2_t, int64x2x2_t]
+    compose:
+      - FnCall:
+          - "vst2{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst2{neon_type[1].lane_nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - "vst2{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst2{neon_type[1].lane_nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x2x2_t, int64x2x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '1']]
+      - FnCall:
+          - "vst2{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2]]}]]
+      - *neon-stable
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x2x2_t, int64x2x2_t]
+    compose:
+      - FnCall:
+          - "vst2{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr: [*neon-stable]
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - [f64, float64x1x3_t, float64x1_t]
+    compose:
+      - LLVMLink:
+          name: 'st3.{neon_type[1].nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+
+  - name: "vst3{neon_type[1].lane_nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x1x3_t, int64x1x3_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - "vst3{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].lane_nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x2x3_t, int64x2x3_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - FnCall:
+          - "vst3{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - *neon-aes
+    assert_instr: [st3]
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x2x3_t, int64x2x3_t]
+    compose:
+      - FnCall:
+          - "vst3{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr: [*neon-stable]
+    assert_instr: [st3]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x2x3_t, int64x2_t]
+      - [f64, float64x2x3_t, float64x2_t]
+    compose:
+      - LLVMLink:
+          name: 'st3.{neon_type[1].nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr: [*neon-stable]
+    assert_instr: [st3]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u64, uint64x2x3_t, int64x2x3_t]
+    compose:
+      - FnCall:
+          - "vst3{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].lane_nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [u64, uint64x1x3_t, int64x1x3_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - "vst3{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].lane_nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x16x3_t, int8x16x3_t, '4']
+      - [u64, uint64x2x3_t, int64x2x3_t, '1']
+      - [p8, poly8x16x3_t, int8x16x3_t, '4']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vst3{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].lane_nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [f64, float64x2x3_t, float64x2_t, '1']
+      - [i8, int8x16x3_t, int8x16_t, '4']
+      - [i64, int64x2x3_t, int64x2_t, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - LLVMLink:
+          name: 'st3lane.{neon_type[1].nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st3lane.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
+
+  - name: "vst3{neon_type[1].lane_nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x1x3_t, int64x1_t, '1']
+      - [f64, float64x1x3_t, float64x1_t, '1']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - LLVMLink:
+          name: 'st3lane.{neon_type[1].nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st3lane.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr: [*neon-stable]
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - [f64, float64x1x4_t, float64x1_t]
+    compose:
+      - LLVMLink:
+          name: 'st4.{neon_type[1].nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+
+  - name: "vst4{neon_type[1].lane_nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x1x4_t, int64x1x4_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - "vst4{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].lane_nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - *neon-aes
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x2x4_t, int64x2x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - FnCall:
+          - "vst4{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - *neon-aes
+    assert_instr: [st4]
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x2x4_t, int64x2x4_t]
+    compose:
+      - FnCall:
+          - "vst4{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr: [*neon-stable]
+    assert_instr: [st4]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x2x4_t, int64x2_t]
+      - [f64, float64x2x4_t, float64x2_t]
+    compose:
+      - LLVMLink:
+          name: 'st4.{neon_type[1].nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr: [*neon-stable]
+    assert_instr: [st4]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u64, uint64x2x4_t, int64x2x4_t]
+    compose:
+      - FnCall:
+          - "vst4{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].lane_nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [u64, uint64x1x4_t, int64x1x4_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - "vst4{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].lane_nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x16x4_t, int8x16x4_t, '4']
+      - [u64, uint64x2x4_t, int64x2x4_t, '1']
+      - [p8, poly8x16x4_t, int8x16x4_t, '4']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vst4{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].lane_nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [f64, float64x2x4_t, float64x2_t, '1']
+      - [i8, int8x16x4_t, int8x16_t, '4']
+      - [i64, int64x2x4_t, int64x2_t, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - LLVMLink:
+          name: 'st4lane.{neon_type[1].nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st4lane.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
+
+  - name: "vst4{neon_type[1].lane_nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x1x4_t, int64x1_t, '1']
+      - [f64, float64x1x4_t, float64x1_t, '1']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - LLVMLink:
+          name: 'st4lane.{neon_type[1].nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st4lane.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
+
+  - name: "vusdot{neon_type[0].laneq_nox}"
+    doc: "Dot product index form with unsigned and signed integers"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-i8mm
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usdot, 'LANE = 3']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]']
+      - [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
+      - Let: [c, int32x4_t, {FnCall: [transmute, [c]]}]
+      - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
+      - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]]
+
+  - name: "vsudot{neon_type[0].laneq_nox}"
+    doc: "Dot product index form with signed and unsigned integers"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-i8mm
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t]
+      - [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
+      - Let:
+          - c
+          - uint32x4_t
+          - FnCall: [transmute, [c]]
+      - Let:
+          - c
+          - "{type[4]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+      - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]]
+
+  - name: "vmul{neon_type.no}"
+    doc: Multiply
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [fmul]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - FnCall: [simd_mul, [a, b]]
+
+  - name: "vmull_high{neon_type[0].noq}"
+    doc: Signed multiply long
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[3]}"
+    attr: [*neon-stable]
+    assert_instr: [smull2]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int16x8_t]
+      - [int16x8_t, int16x4_t, '[4, 5, 6, 7]', int32x4_t]
+      - [int32x4_t, int32x2_t, '[2, 3]', int64x2_t]
+    compose:
+      - Let:
+          - a
+          - "{neon_type[1]}"
+          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
+      - Let:
+          - b
+          - "{neon_type[1]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - FnCall: ["vmull_{neon_type[0]}", [a, b]]
+
+  - name: "vmull_high{neon_type[0].noq}"
+    doc: "Unsigned multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[3]}"
+    attr: [*neon-stable]
+    assert_instr: [umull2]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint16x8_t]
+      - [uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', uint32x4_t]
+      - [uint32x4_t, uint32x2_t, '[2, 3]', uint64x2_t]
+    compose:
+      - Let:
+          - a
+          - "{neon_type[1]}"
+          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
+      - Let:
+          - b
+          - "{neon_type[1]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - FnCall: ["vmull_{neon_type[0]}", [a, b]]
+
+  - name: "vmull_p64"
+    doc: "Polynomial multiply long"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-stable
+    safety:
+      unsafe: [neon]
+    assert_instr: [pmull]
+    types:
+      - ["p64", "p128"]
+    compose:
+      - LLVMLink:
+          name: "pmull.{type[0]}"
+          return_type: "int8x16_t"
+          links:
+            - link: "llvm.aarch64.neon.pmull64"
+              arch: aarch64,arm64ec
+      - FnCall: [transmute, [{FnCall: ["_vmull_p64", [a, b]]}]]
+
+  - name: "vmull_high{neon_type[0].noq}"
+    doc: "Polynomial multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - *neon-stable
+    safety:
+      unsafe: [neon]
+    assert_instr: [pmull]
+    types:
+      - [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t]
+    compose:
+      - Let:
+          - a
+          - "{neon_type[1]}"
+          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
+      - Let:
+          - b
+          - "{neon_type[1]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - FnCall: ["vmull_{neon_type[0]}", [a, b]]
+
+  - name: "vmull_high{neon_type[0].noq}"
+    doc: "Polynomial multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-stable
+    safety:
+      unsafe: [neon]
+    assert_instr: [pmull]
+    types:
+      - [poly64x2_t, "p128"]
+    compose:
+      - FnCall:
+          - "vmull_{neon_type[0]}"
+          - - FnCall: [simd_extract!, [a, '1']]
+            - FnCall: [simd_extract!, [b, '1']]
+
+  - name: "vmulx{neon_type.no}"
+    doc: Floating-point multiply extended
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [fmulx]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "fmulx.{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.fmulx.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vmulx{type[0]}"
+    doc: Floating-point multiply extended
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [fmulx]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32"]
+      - ["d_f64", "f64"]
+    compose:
+      - LLVMLink:
+          name: "fmulx.{type[1]}"
+          links:
+            - link: "llvm.aarch64.neon.fmulx.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmulx_lane_f64"
+    doc: Floating-point multiply extended
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - vmulx_f64
+          - - a
+            - FnCall:
+                - 'transmute::<f64, _>'
+                - - FnCall:
+                      - "simd_extract!"
+                      - - b
+                        - 'LANE as u32'
+
+  - name: "vmulx{type[0]}"
+    doc: Floating-point multiply extended
+    arguments: ["a: {type[1]}", "b: {neon_type[2]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["q_lane_f64", float64x2_t, float64x1_t, "q_f64", '[LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - "vmulx{type[3]}"
+          - - a
+            - FnCall:
+                - "simd_shuffle!"
+                - - b
+                  - b
+                  - "{type[4]}"
+
+  - name: "vmulx{type[0]}"
+    doc: Floating-point multiply extended
+    arguments: ["a: {type[1]}", "b: {neon_type[2]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["d_lane_f64", "f64", float64x1_t, "d_f64", 'LANE as u32']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - "vmulx{type[3]}"
+          - - a
+            - FnCall:
+                - "simd_extract!"
+                - - b
+                  - "{type[4]}"
+
+  - name: "vmulx_laneq_f64"
+    doc: Floating-point multiply extended
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, float64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '1']]
+      - FnCall:
+          - vmulx_f64
+          - - a
+            - FnCall:
+                - 'transmute::<f64, _>'
+                - - FnCall:
+                      - "simd_extract!"
+                      - - b
+                        - 'LANE as u32'
+
+  - name: "vmulx{type[0]}"
+    doc: Floating-point multiply extended
+    arguments: ["a: {type[1]}", "b: {neon_type[2]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32, LANE as u32]']
+      - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32, LANE as u32]']
+      - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
+      - FnCall:
+          - "vmulx{type[4]}"
+          - - a
+            - FnCall:
+                - "simd_shuffle!"
+                - - b
+                  - b
+                  - "{type[5]}"
+
+  - name: "vmulx{type[0]}"
+    doc: Floating-point multiply extended
+    arguments: ["a: {type[1]}", "b: {neon_type[2]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['s_lane_f32', f32, float32x2_t, '1', 's_f32', 'LANE as u32']
+      - ['s_laneq_f32', f32, float32x4_t, '2', 's_f32', 'LANE as u32']
+      - ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64', 'LANE as u32']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
+      - FnCall:
+          - "vmulx{type[4]}"
+          - - a
+            - FnCall:
+                - "simd_extract!"
+                - - b
+                  - "{type[5]}"
+
+  - name: "vfma{neon_type.no}"
+    doc: Floating-point fused Multiply-Add to accumulator(vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+    assert_instr: [fmadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - LLVMLink:
+          name: "_vfma{neon_type.no}"
+          arguments: ["a: {type}", "b: {type}", "c: {type}"]
+          links:
+            - link: "llvm.fma.{neon_type}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vfma{neon_type.no}", [b, c, a]]
+
+  - name: "vfma{neon_type.no}"
+    doc: Floating-point fused Multiply-Add to accumulator(vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+    assert_instr: [fmla]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "_vfma{neon_type.no}"
+          arguments: ["a: {type}", "b: {type}", "c: {type}"]
+          links:
+            - link: "llvm.fma.{neon_type}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vfma{neon_type.no}", [b, c, a]]
+
+  - name: "vfma_n_f64"
+    doc: Floating-point fused Multiply-Add to accumulator(vector)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+    assert_instr: [fmadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, f64]
+    compose:
+      - FnCall:
+          - "vfma_f64"
+          - - a
+            - b
+            - FnCall:
+                - "vdup_n_f64"
+                - - c
+
+  - name: "vfmaq_n_f64"
+    doc: Floating-point fused Multiply-Add to accumulator(vector)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall:
+          - stable
+          - - 'feature = "neon_intrinsics"'
+            - 'since = "1.59.0"'
+    assert_instr: [fmla]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x2_t, f64]
+    compose:
+      - FnCall:
+          - "vfmaq_f64"
+          - - a
+            - b
+            - FnCall:
+                - "vdupq_n_f64"
+                - - c
+
+  - name: "vdiv{neon_type.no}"
+    doc: "Divide"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [fdiv]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - FnCall: [simd_div, [a, b]]
+
+  - name: "vsub{neon_type.no}"
+    doc: "Subtract"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [fsub]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - FnCall: [simd_sub, [a, b]]
+
+  - name: "vsub{type[0]}"
+    doc: "Subtract"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['d_s64', 'i64']
+      - ['d_u64', 'u64']
+    compose:
+      - MethodCall: [a, wrapping_sub, [b]]
+
+  - name: "vaddv{neon_type[0].no}"
+    doc: Floating-point add across vector
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall:
+          - stable
+          - - 'feature = "neon_intrinsics"'
+            - 'since = "1.59.0"'
+    assert_instr: [faddp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, f32]
+      - [float32x4_t, f32]
+      - [float64x2_t, f64]
+    compose:
+      - LLVMLink:
+          name: "faddv.{type[1]}.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.faddv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vaddlv{neon_type[0].no}"
+    doc: Signed Add Long across Vector
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [saddlv]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, i32]
+      - [int16x8_t, i32]
+      - [int32x4_t, i64]
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.saddlv.{type[1]}.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.saddlv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vaddlv{neon_type.no}"
+    doc: Signed Add Long across Vector
+    arguments: ["a: {neon_type}"]
+    return_type: "i64"
+    attr: [*neon-stable]
+    assert_instr: [saddlp]
+    safety:
+      unsafe: [neon]
+    types:
+      - int32x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.saddlv.i64.v2i32"
+          links:
+            - link: "llvm.aarch64.neon.saddlv.i64.v2i32"
+              arch: aarch64,arm64ec
+
+  - name: "vaddlv{neon_type[0].no}"
+    doc: Unsigned Add Long across Vector
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [uaddlv]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x4_t, u32, i32]
+      - [uint16x8_t, u32, i32]
+      - [uint32x4_t, u64, i64]
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vaddlv{neon_type[0].no}"
+    doc: Unsigned Add Long across Vector
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [uaddlp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, u64, i64]
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vsubw_high{neon_type[1].noq}"
+    doc: Signed Subtract Wide
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [ssubw]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]']
+      - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]']
+    compose:
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+      - FnCall:
+          - simd_sub
+          - - a
+            - FnCall: [simd_cast, [c]]
+
+  - name: "vsubw_high{neon_type[1].noq}"
+    doc: Unsigned Subtract Wide
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr: [*neon-stable]
+    assert_instr: [usubw]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
+    compose:
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+      - FnCall:
+          - simd_sub
+          - - a
+            - FnCall: [simd_cast, [c]]
+
+  - name: "vsubl_high{neon_type[0].noq}"
+    doc: "Signed Subtract Long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [ssubl]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t]
+      - [int16x8_t, int32x4_t, '[4, 5, 6, 7]', int16x4_t]
+      - [int32x4_t, int64x2_t, '[2, 3]', int32x2_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[3]}"
+          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
+      - Let:
+          - d
+          - "{neon_type[1]}"
+          - FnCall: [simd_cast, [c]]
+      - Let:
+          - e
+          - "{neon_type[3]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - Let:
+          - f
+          - "{neon_type[1]}"
+          - FnCall: [simd_cast, [e]]
+      - FnCall: [simd_sub, [d, f]]
+
+  - name: "vsubl_high{neon_type[0].noq}"
+    doc: "Unsigned Subtract Long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [usubl]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t]
+      - [uint16x8_t, uint32x4_t, '[4, 5, 6, 7]', uint16x4_t]
+      - [uint32x4_t, uint64x2_t, '[2, 3]', uint32x2_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[3]}"
+          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
+      - Let:
+          - d
+          - "{neon_type[1]}"
+          - FnCall: [simd_cast, [c]]
+      - Let:
+          - e
+          - "{neon_type[3]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - Let:
+          - f
+          - "{neon_type[1]}"
+          - FnCall: [simd_cast, [e]]
+      - FnCall: [simd_sub, [d, f]]
+
+  - name: "vbcax{neon_type.no}"
+    doc: Bit clear and exclusive OR
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    assert_instr: [bcax]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x16_t
+      - int16x8_t
+      - int32x4_t
+      - int64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.crypto.bcaxs.{neon_type}"
+          links:
+            - link: "llvm.aarch64.crypto.bcaxs.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vbcax{neon_type.no}"
+    doc: Bit clear and exclusive OR
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    assert_instr: [bcax]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x16_t
+      - uint16x8_t
+      - uint32x4_t
+      - uint64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.crypto.bcaxu.{neon_type}"
+          links:
+            - link: "llvm.aarch64.crypto.bcaxu.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vcadd{neon_type.rot270}"
+    doc: "Floating-point complex add"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - *neon-unstable-fcma
+    assert_instr: [fcadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.vcadd.rot270.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.vcadd.rot270.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vcadd{neon_type.rot90}"
+    doc: "Floating-point complex add"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - *neon-unstable-fcma
+    assert_instr: [fcadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.vcadd.rot90.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.vcadd.rot90.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vcmla{neon_type.no}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - *neon-unstable-fcma
+    assert_instr: [fcmla]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.vcmla.rot0.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.vcmla.rot0.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vcmla{neon_type.rot90}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - *neon-unstable-fcma
+    assert_instr: [fcmla]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.vcmla.rot90.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.vcmla.rot90.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vcmla{neon_type.rot270}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - *neon-unstable-fcma
+    assert_instr: [fcmla]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.vcmla.rot270.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.vcmla.rot270.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vcmla{neon_type[0].laneq_nox}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-fcma
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
+
+  - name: "vcmla{neon_type[0].rot90_laneq}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-fcma
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
+
+  - name: "vcmla{neon_type[0].rot90_lane}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-fcma
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
+
+  - name: "vcmla{neon_type.rot180}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - *neon-unstable-fcma
+    assert_instr: [fcmla]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "llvm.aarch64.neon.vcmla.rot180.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.vcmla.rot180.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vcmla{neon_type[0].rot180_laneq}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-fcma
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
+
+  - name: "vcmla{type[3]}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-fcma
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f32']
+      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f32']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
+
+  - name: "vcmla{neon_type[0].rot270_laneq}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-fcma
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
+
+  - name: "vcmla{neon_type[0].lane_nox}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-fcma
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
+
+  - name: "vcmla{neon_type[0].rot270_lane}"
+    doc: Floating-point complex multiply accumulate
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,fcma"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-fcma
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
+      - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
+
+  - name: "vdot{neon_type[0].laneq_nox}"
+    doc: Dot product arithmetic (indexed)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    static_defs: ["const LANE: i32"]
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]']
+      - [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
+      - Let:
+          - c
+          - "{neon_type[3]}"
+          - FnCall: [transmute, [c]]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+      - FnCall:
+          - "vdot{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: [transmute, [c]]
+
+  - name: "vdot{neon_type[0].laneq_nox}"
+    doc: Dot product arithmetic (indexed)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    static_defs: ["const LANE: i32"]
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
+      - Let:
+          - c
+          - "{neon_type[3]}"
+          - FnCall: [transmute, [c]]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+      - FnCall:
+          - "vdot{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: [transmute, [c]]
+
+  - name: "vmax{neon_type.no}"
+    doc: Maximum (vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [fmax]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "fmax.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fmax.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vmaxnm{neon_type.no}"
+    doc: Floating-point Maximum Number (vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [fmaxnm]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "fmaxnm.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vmaxnmv{neon_type[0].no}"
+    doc: Floating-point maximum number across vector
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [fmaxnmp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, f32]
+      - [float64x2_t, f64]
+    compose:
+      - LLVMLink:
+          name: "fmaxnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmaxnmv{neon_type[0].no}"
+    doc: Floating-point maximum number across vector
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [fmaxnmv]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x4_t, f32]
+    compose:
+      - LLVMLink:
+          name: "fmaxnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vpmax{type[0]}"
+    doc: "Floating-point maximum pairwise"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{type[2]}"
+    attr: [*neon-stable]
+    assert_instr: [fmaxp]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", float32x2_t, f32]
+      - ["qd_f64", float64x2_t, f64]
+    compose:
+      - LLVMLink:
+          name: "fmaxv.{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxv.{type[2]}.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmin{neon_type.no}"
+    doc: "Minimum (vector)"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [fmin]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "fmin.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fmin.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vminnm{neon_type.no}"
+    doc: "Floating-point Minimum Number (vector)"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr: [*neon-stable]
+    assert_instr: [fminnm]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "fminnm.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vminnmv{neon_type[0].no}"
+    doc: "Floating-point minimum number across vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, "f32"]
+      - [float64x2_t, "f64"]
+    compose:
+      - LLVMLink:
+          name: "vminnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vminnmv{neon_type[0].no}"
+    doc: "Floating-point minimum number across vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmv]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x4_t, "f32"]
+    compose:
+      - LLVMLink:
+          name: "vminnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmovl_high{neon_type[0].noq}"
+    doc: Vector move
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [sxtl2]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, int16x8_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
+      - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]']
+    compose:
+      - Let:
+          - a
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]
+      - FnCall: ["vmovl{neon_type[0].noq}", [a]]
+
+  - name: "vmovl_high{neon_type[0].noq}"
+    doc: Vector move
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [uxtl2]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]']
+      - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]']
+    compose:
+      - Let:
+          - a
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]
+      - FnCall: ["vmovl{neon_type[0].noq}", [a]]
+
+  - name: "vpadd{neon_type.no}"
+    doc: Floating-point add pairwise
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{type}"
+    attr: [*neon-stable]
+    assert_instr: [faddp]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "faddp.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.faddp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vpadd{type[0]}"
+    doc: "Floating-point add pairwise"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{type[2]}"
+    attr: [*neon-stable]
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", float32x2_t, f32]
+      - ["d_f64", float64x2_t, f64]
+    compose:
+      - Let:
+          - a1
+          - "{type[2]}"
+          - FnCall: [simd_extract!, [a, '0']]
+      - Let:
+          - a2
+          - "{type[2]}"
+          - FnCall: [simd_extract!, [a, '1']]
+      - Identifier: ['a1 + a2', Symbol]
+
+  - name: "vpmin{type[0]}"
+    doc: Floating-point minimum pairwise
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{type[2]}"
+    attr: [*neon-stable]
+    assert_instr: [fminp]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", float32x2_t, f32]
+      - ["qd_f64", float64x2_t, f64]
+    compose:
+      - LLVMLink:
+          name: "fminv.{type[2]}.{neon_type[1]}"
+          links:
+            - link: "llvm.aarch64.neon.fminv.{type[2]}.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqdmullh_s16"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", "i32"]
+    compose:
+      - Let: [a, int16x4_t, {FnCall: [vdup_n_s16, [a]]}]
+      - Let: [b, int16x4_t, {FnCall: [vdup_n_s16, [b]]}]
+      - FnCall: [simd_extract!, [{FnCall: [vqdmull_s16, [a, b]]}, '0']]
+
+  - name: "vqdmulls_s32"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i32", "i64"]
+    compose:
+      - LLVMLink:
+          name: "vqdmulls_s32"
+          links:
+            - link: "llvm.aarch64.neon.sqdmulls.scalar"
+              arch: aarch64,arm64ec
+
+  - name: "vqdmull_high{neon_type[0].noq}"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
+      - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]']
+    compose:
+      - Let: [a, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, '{type[3]}']]}]
+      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, '{type[3]}']]}]
+      - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
+
+  - name: "vqdmull_high_n_{type[1]}"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, "i16", int32x4_t, int16x4_t, '[4, 5, 6, 7]']
+      - [int32x4_t, "i32", int64x2_t, int32x2_t, '[2, 3]']
+    compose:
+      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
+      - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]
+      - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
+
+  - name: "vqdmull{type[3]}"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", int16x4_t, "i32", 'h_lane_s16', 'h_s16']
+      - ["i32", int32x4_t, "i64", 's_laneq_s32', 's_s32']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 2]]
+      - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - FnCall: ["vqdmull{type[4]}", [a, b]]
+
+  - name: "vqdmullh_laneq_s16"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, N = 4]]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", int16x8_t, "i32"]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 3]]
+      - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - FnCall: ["vqdmullh_s16", [a, b]]
+
+  - name: "vqdmulls_lane_s32"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i32", int32x2_t, "i64"]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 1]]
+      - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - FnCall: ["vqdmulls_s32", [a, b]]
+
+  - name: "vqdmull{type[6]}"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int16x4_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]', '_high_lane_s16']
+      - [int32x4_t, int32x4_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]', '_high_laneq_s32']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '2']]
+      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
+      - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
+      - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
+
+  - name: "vqdmull_high_lane_s32"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int32x2_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '1']]
+      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
+      - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
+      - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
+
+  - name: "vqdmull_high_laneq_s16"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '3']]
+      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
+      - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
+      - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
+
+  - name: "vqdmull_laneq_s16"
+    doc: "Vector saturating doubling long multiply by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 4']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int16x8_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '3']]
+      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - FnCall: [vqdmull_s16, [a, b]]
+
+  - name: "vqdmull_laneq_s32"
+    doc: "Vector saturating doubling long multiply by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int32x4_t, int64x2_t, '[N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '2']]
+      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - FnCall: [vqdmull_s32, [a, b]]
+
+  - name: "vqdmlal{type[4]}"
+    doc: "Signed saturating doubling multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16]
+      - [int64x2_t, int32x4_t, int32x4_t, int64x2_t, _high_s32]
+      - [int32x4_t, int16x8_t, "i16", int32x4_t, _high_n_s16]
+      - [int64x2_t, int32x4_t, "i32", int64x2_t, _high_n_s32]
+    compose:
+      - FnCall: ["vqadd{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}", [b, c]]}]]
+
+  - name: "vqdmlal{type[4]}"
+    doc: "Signed saturating doubling multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x8_t, int16x4_t, int32x4_t, _high_lane_s16, '2']
+      - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_laneq_s16, '3']
+      - [int64x2_t, int32x4_t, int32x2_t, int64x2_t, _high_lane_s32, '1']
+      - [int64x2_t, int32x4_t, int32x4_t, int64x2_t, _high_laneq_s32, '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[5]}"]]
+      - FnCall: ["vqadd{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}::<N>", [b, c]]}]]
+
+  - name: "vqdmlalh_{type[2]}"
+    doc: "Signed saturating doubling multiply-add long"
+    arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i32", "i16", "s16"]
+    compose:
+      - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}]
+      - FnCall: [vqadds_s32, [a, {FnCall: [simd_extract!, [x, 0]]}]]
+
+  - name: "vqdmlals_s32"
+    doc: "Signed saturating doubling multiply-add long"
+    arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "i32", "i32", "i64"]
+    compose:
+      - Let: [x, i64, {FnCall: [vqaddd_s64, [a, {FnCall: [vqdmulls_s32, [b, c]]}]]}]
+      - Identifier: ['x as i64', Symbol]
+
+  - name: "vqdmlal{type[4]}"
+    doc: "Signed saturating doubling multiply-add long"
+    arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i32", "i16", int16x4_t, "i32", h_lane_s16, '2', h_s16]
+      - ["i32", "i16", int16x8_t, "i32", h_laneq_s16, '3', h_s16]
+      - ["i64", "i32", int32x2_t, "i64", s_lane_s32, '1', s_s32]
+      - ["i64", "i32", int32x4_t, "i64", s_laneq_s32, '2', s_s32]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
+      - FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+
+  - name: "vqdmlal_laneq_s16"
+    doc: "Vector widening saturating doubling multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, int16x8_t, int32x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '3']]
+      - FnCall: [vqaddq_s32, [a, {FnCall: ["vqdmull_laneq_s16::<N>", [b, c]]}]]
+
+  - name: "vqdmlal_laneq_s32"
+    doc: "Vector widening saturating doubling multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x2_t, int32x2_t, int32x4_t, int64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '2']]
+      - FnCall: [vqaddq_s64, [a, {FnCall: ["vqdmull_laneq_s32::<N>", [b, c]]}]]
+
+  - name: "vqdmlsl{type[4]}"
+    doc: "Signed saturating doubling multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16]
+      - [int64x2_t, int32x4_t, int32x4_t, int64x2_t, _high_s32]
+      - [int32x4_t, int16x8_t, "i16", int32x4_t, _high_n_s16]
+      - [int64x2_t, int32x4_t, "i32", int64x2_t, _high_n_s32]
+    compose:
+      - FnCall: ["vqsub{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}", [b, c]]}]]
+
+  - name: "vqdmlsl{type[4]}"
+    doc: "Signed saturating doubling multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x8_t, int16x4_t, int32x4_t, '_high_lane_s16', '2']
+      - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, '_high_laneq_s16', '3']
+      - [int64x2_t, int32x4_t, int32x2_t, int64x2_t, '_high_lane_s32', '1']
+      - [int64x2_t, int32x4_t, int32x4_t, int64x2_t, '_high_laneq_s32', '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[5]}"]]
+      - FnCall: ["vqsub{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}::<N>", [b, c]]}]]
+
+  - name: "vqdmlslh_s16"
+    doc: "Signed saturating doubling multiply-subtract long"
+    arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i32", "i16"]
+    compose:
+      - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}]
+      - FnCall: [vqsubs_s32, [a, {FnCall: [simd_extract!, [x, '0']]}]]
+
+  - name: "vqdmlsls_s32"
+    doc: "Signed saturating doubling multiply-subtract long"
+    arguments: ["a: {type[0]}", "b: {type[1]}", "c: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "i32", "i32", "i64"]
+    compose:
+      - Let: [x, i64, {FnCall: [vqsubd_s64, [a, {FnCall: [vqdmulls_s32, [b, c]]}]]}]
+      - Identifier: ['x as i64', Symbol]
+
+  - name: "vqdmlsl{type[4]}"
+    doc: "Signed saturating doubling multiply-subtract long"
+    arguments: ["a: {type[0]}", "b: {type[1]}", "c: {neon_type[2]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i32", "i16", int16x4_t, "i32", 'h_lane_s16', '2', 'h_s16']
+      - ["i32", "i16", int16x8_t, "i32", 'h_laneq_s16', '3', 'h_s16']
+      - ["i64", "i32", int32x2_t, "i64", 's_lane_s32', '1', 's_s32']
+      - ["i64", "i32", int32x4_t, "i64", 's_laneq_s32', '2', 's_s32']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
+      - FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+
+  - name: "vqdmlsl_laneq_s16"
+    doc: "Vector widening saturating doubling multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, int16x8_t, int32x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '3']]
+      - FnCall: ["vqsubq_s32", [a, {FnCall: ["vqdmull_laneq_s16::<N>", [b, c]]}]]
+
+  - name: "vqdmlsl_laneq_s32"
+    doc: "Vector widening saturating doubling multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x2_t, int32x2_t, int32x4_t, int64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '2']]
+      - FnCall: [vqsubq_s64, [a, {FnCall: ["vqdmull_laneq_s32::<N>", [b, c]]}]]
+
+  - name: "vqdmulh{type[4]}"
+    doc: "Signed saturating doubling multiply returning high half"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", "i16", "i16", int16x4_t, 'h_s16']
+      - ["i32", "i32", "i32", int32x2_t, 's_s32']
+    compose:
+      - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [a]]}]
+      - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [b]]}]
+      - FnCall: [simd_extract!, [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}, '0']]
+
+  - name: "vqdmulhh{type[3]}"
+    doc: "Signed saturating doubling multiply returning high half"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", int16x4_t, "i16", '_lane_s16', '2']
+      - ["i16", int16x8_t, "i16", '_laneq_s16', '3']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]]
+      - Let: [b, 'i16', {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - FnCall: ['vqdmulhh_s16', [a, b]]
+
+  - name: "vqdmulhs{type[3]}"
+    doc: "Signed saturating doubling multiply returning high half"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i32", int32x2_t, "i32", "_lane_s32", '1']
+      - ["i32", int32x4_t, "i32", "_laneq_s32", '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]]
+      - Let: [b, 'i32', {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - FnCall: ['vqdmulhs_s32', [a, b]]
+
+  - name: "vqmovn_high{neon_type[1].noq}"
+    doc: "Signed saturating extract narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
+    compose:
+      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]]
+
+  - name: "vqmovn_high{neon_type[1].noq}"
+    doc: "Signed saturating extract narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+    compose:
+      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]]
+
+  - name: "vqmovn{type[2]}"
+    doc: "Saturating extract narrow"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", "i8", 'h_s16', s16]
+      - ["i32", "i16", 's_s32', s32]
+    compose:
+      - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
+
+  - name: "vqmovn{type[2]}"
+    doc: "Saturating extract narrow"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u16", "u8", 'h_u16', 'u16']
+      - ["u32", "u16", 's_u32', 'u32']
+    compose:
+      - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
+
+  - name: "vqmovnd_s64"
+    doc: "Saturating extract narrow"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "i32"]
+    compose:
+      - LLVMLink:
+          name: "vqmovnd_s64"
+          links:
+            - link: "llvm.aarch64.neon.scalar.sqxtn.i32.i64"
+              arch: aarch64,arm64ec
+
+  - name: "vqmovnd_u64"
+    doc: "Saturating extract narrow"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u64", "u32"]
+    compose:
+      - LLVMLink:
+          name: "vqmovnd_u64"
+          links:
+            - link: "llvm.aarch64.neon.scalar.uqxtn.i32.i64"
+              arch: aarch64,arm64ec
+
+  - name: "vqmovun{type[2]}"
+    doc: "Signed saturating extract unsigned narrow"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", "u8", 'h_s16', s16]
+      - ["i32", "u16", 's_s32', s32]
+      - ["i64", "u32", 'd_s64', s64]
+    compose:
+      - FnCall: [simd_extract!, [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
+
+  - name: "vqmovun_high_{neon_type[1]}"
+    doc: "Signed saturating extract unsigned narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int16x8_t, uint8x16_t, s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, int32x4_t, uint16x8_t, s32, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, int64x2_t, uint32x4_t, s64, '[0, 1, 2, 3]']
+    compose:
+      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovun_{type[3]}", [b]]}, "{type[4]}"]]
+
+  - name: "vqrdmulh{type[1]}"
+    doc: "Signed saturating rounding doubling multiply returning high half"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", 'h_s16', 's16']
+      - ["i32", 's_s32', 's32']
+    compose:
+      - FnCall: [simd_extract!, [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}, '0']]
+
+  - name: "vqrdmulh{type[2]}"
+    doc: "Signed saturating rounding doubling multiply returning high half"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh, LANE = 1]]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", int16x4_t, 'h_lane_s16', 'h_s16', '2']
+      - ["i16", int16x8_t, 'h_laneq_s16', 'h_s16', '3']
+      - ["i32", int32x2_t, 's_lane_s32', 's_s32', '1']
+      - ["i32", int32x4_t, 's_laneq_s32', 's_s32', '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
+      - FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
+
+  - name: "vqrdmlah{neon_type.no}"
+    doc: "Signed saturating rounding doubling multiply accumulate returning high half"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "rdm"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlah]]}]]
+      - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "vqrdmlah{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.sqrdmlah.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vqrdmlah{type[3]}"
+    doc: "Signed saturating rounding doubling multiply accumulate returning high half"
+    arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "rdm"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlah]]}]]
+      - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", int16x4_t, s16, 'h_s16']
+      - ["i32", int32x2_t, s32, 's_s32']
+    compose:
+      - Let: [a, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [a]]}]
+      - Let: [b, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [b]]}]
+      - Let: [c, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [c]]}]
+      - FnCall: [simd_extract!, [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}, '0']]
+
+  - name: "vqrdmlah{type[0]}"
+    doc: "Signed saturating rounding doubling multiply accumulate returning high half"
+    arguments: ["a: {type[1]}", "b: {type[2]}", "c: {neon_type[3]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "rdm"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlah, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
+      - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
+      - FnCall: ["vqrdmlah{neon_type[2].no}", [a, b, c]]
+
+  - name: "vqrdmlah{type[4]}"
+    doc: "Signed saturating rounding doubling multiply accumulate returning high half"
+    arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "rdm"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlah, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", int16x4_t, '2', "h_s16", h_lane_s16, h_s16]
+      - ["i16", int16x8_t, '3', "h_s16", h_laneq_s16, h_s16]
+      - ["i32", int32x2_t, '1', "s_s32", s_lane_s32, s_s32]
+      - ["i32", int32x4_t, '2', "s_s32", s_laneq_s32, s_s32]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+
+  - name: "vqrdmlsh{neon_type.no}"
+    doc: "Signed saturating rounding doubling multiply subtract returning high half"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "rdm"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlsh]]}]]
+      - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "vqrdmlsh{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.sqrdmlsh.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vqrdmlsh{type[1]}"
+    doc: "Signed saturating rounding doubling multiply subtract returning high half"
+    arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "rdm"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlsh]]}]]
+      - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", "h_s16", int16x4_t, s16]
+      - ["i32", "s_s32", int32x2_t, s32]
+    compose:
+      - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}]
+      - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}]
+      - Let: [c, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [c]]}]
+      - FnCall: [simd_extract!, [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}, '0']]
+
+  - name: "vqrdmlsh{type[0]}"
+    doc: "Signed saturating rounding doubling multiply subtract returning high half"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[3]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "rdm"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlsh, LANE = 1]]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
+      - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
+      - FnCall: ["vqrdmlsh{neon_type[2].no}", [a, b, c]]
+
+  - name: "vqrdmlsh{type[3]}"
+    doc: "Signed saturating rounding doubling multiply subtract returning high half"
+    arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "rdm"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmlsh, LANE = 1]]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "rdm_intrinsics"', 'since = "1.62.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", int16x4_t, '2', h_lane_s16, h_s16]
+      - ["i16", int16x8_t, '3', h_laneq_s16, h_s16]
+      - ["i32", int32x2_t, '1', s_lane_s32, s_s32]
+      - ["i32", int32x4_t, '2', s_laneq_s32, s_s32]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+
+  - name: "vqrshl{type[0]}"
+    doc: "Signed saturating rounding shift left"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['s_s32', "i32"]
+      - ['d_s64', "i64"]
+    compose:
+      - LLVMLink:
+          name: "vqrshl{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.sqrshl.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqrshl{type[1]}"
+    doc: "Signed saturating rounding shift left"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i8", 'b_s8', int8x8_t, s8]
+      - ["i16", 'h_s16', int16x4_t, s16]
+    compose:
+      - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}]
+      - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}]
+      - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[3]}", [a, b]]}, '0']]
+
+  - name: "vqrshl{type[2]}"
+    doc: "Unsigned signed saturating rounding shift left"
+    arguments: ["a: {type[0]}", "b: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u32", "i32", 's_u32']
+      - ["u64", "i64", 'd_u64']
+    compose:
+      - LLVMLink:
+          name: "vqrshl{type[2]}"
+          links:
+            - link: "llvm.aarch64.neon.uqrshl.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqrshl{type[2]}"
+    doc: "Unsigned signed saturating rounding shift left"
+    arguments: ["a: {type[0]}", "b: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u8", "i8", "b_u8", uint8x8_t, int8x8_t, s8]
+      - ["u16", "i16", "h_u16", uint16x4_t, int16x4_t, s16]
+    compose:
+      - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n_{type[0]}", [a]]}]
+      - Let: [b, "{neon_type[4]}", {FnCall: ["vdup_n_{type[5]}", [b]]}]
+      - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[0]}", [a, b]]}, '0']]
+
+  - name: "vqrshrn{type[2]}"
+    doc: "Signed saturating rounded shift right narrow"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i16", "i8", 'h_n_s16', 'N >= 1 && N <= 8', int16x8_t, q_n_s16]
+      - ["i32", "i16", 's_n_s32', 'N >= 1 && N <= 16', int32x4_t, q_n_s32]
+      - ["i64", "i32", 'd_n_s64', 'N >= 1 && N <= 32', int64x2_t, q_n_s64]
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}]
+      - FnCall: [simd_extract!, [{FnCall: ["vqrshrn_n{neon_type[4].noq}::<N>", [a]]}, '0']]
+
+  - name: "vqrshrn{type[3]}"
+    doc: "Signed saturating rounded shift right narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', 'N >= 1 && N <= 8']
+      - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', '[0, 1, 2, 3, 4, 5, 6, 7]', 'N >= 1 && N <= 16']
+      - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', '[0, 1, 2, 3]', 'N >= 1 && N <= 32']
+    compose:
+      - FnCall: [static_assert!, ["{type[5]}"]]
+      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::<N>", [b]]}, "{type[4]}"]]
+
+  - name: "vqrshrn{type[0]}"
+    doc: "Unsigned saturating rounded shift right narrow"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [h_n_u16, u16, u8, 'N >= 1 && N <= 8', uint16x8_t, q_n_u16, _n_u16]
+      - [s_n_u32, u32, u16, 'N >= 1 && N <= 16', uint32x4_t, q_n_u32, _n_u32]
+      - [d_n_u64, u64, u32, 'N >= 1 && N <= 32', uint64x2_t, q_n_u64, _n_u64]
+    compose:
+      - FnCall: [static_assert!, ['{type[3]}']]
+      - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}]
+      - FnCall: [simd_extract!, [{FnCall: ["vqrshrn{type[6]}::<N>", [a]]}, '0']]
+
+  - name: "vqrshrn_high_n{neon_type[1].noq}"
+    doc: "Unsigned saturating rounded shift right narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+    compose:
+      - FnCall: [static_assert!, ['{type[3]}']]
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - FnCall:
+                - "vqrshrn_n{neon_type[1].noq}::<N>"
+                - - b
+            - "{type[4]}"
+
+  - name: "vqrshrun{type[0]}"
+    doc: "Signed saturating rounded shift right unsigned narrow"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [h_n_s16, "i16", "u8", 'N >= 1 && N <= 8', int16x8_t, s16]
+      - [s_n_s32, "i32", "u16", 'N >= 1 && N <= 16', int32x4_t, s32]
+      - [d_n_s64, "i64", "u32", 'N >= 1 && N <= 32', int64x2_t, s64]
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - Let:
+          - a
+          - "{neon_type[4]}"
+          - FnCall: ["vdupq_n_{type[5]}", [a]]
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vqrshrun_n_{type[5]}::<N>"
+                - - a
+            - '0'
+
+  - name: "vqrshrun_high_n{neon_type[1].noq}"
+    doc: "Signed saturating rounded shift right unsigned narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', s32, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', s64, '[0, 1, 2, 3]']
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - FnCall:
+                - "vqrshrun_n_{type[4]}::<N>"
+                - - b
+            - "{type[5]}"
+
+  - name: "vqshld_{type}"
+    doc: "Signed saturating shift left"
+    arguments: ["a: {type}", "b: {type}"]
+    return_type: "{type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - i64
+    compose:
+      - LLVMLink:
+          name: "vqshld{type}"
+          links:
+            - link: "llvm.aarch64.neon.sqshl.{type}"
+              arch: aarch64,arm64ec
+
+  - name: "vqshl{type[0]}"
+    doc: "Signed saturating shift left"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [b_s8, "i8", int8x8_t]
+      - [h_s16, "i16", int16x4_t]
+      - [s_s32, "i32", int32x2_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall:
+              - "vqshl{neon_type[2].noq}"
+              - - FnCall: ["vdup_n{neon_type[2].no}", [a]]
+                - FnCall: ["vdup_n{neon_type[2].no}", [b]]
+      - FnCall: [simd_extract!, [c, '0']]
+
+  - name: "vqshl{type[0]}"
+    doc: "Signed saturating shift left"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [b_n_s8, "i8", "3", s8]
+      - [h_n_s16, "i16", "4", s16]
+      - [s_n_s32, "i32", "5", s32]
+      - [d_n_s64, "i64", "6", s64]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vqshl_n_{type[3]}::<N>"
+                - - FnCall: ["vdup_n_{type[3]}", [a]]
+            - '0'
+
+  - name: "vqshld_{type[0]}"
+    doc: "Unsigned saturating shift left"
+    arguments: ["a: {type[0]}", "b: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u64", "i64"]
+    compose:
+      - LLVMLink:
+          name: "vqshld{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.uqshl.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqshl{type[0]}"
+    doc: "Unsigned saturating shift left"
+    arguments: ["a: {type[1]}", "b: {type[2]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [b_u8, "u8", "i8", uint8x8_t, int8x8_t]
+      - [h_u16, "u16", "i16", uint16x4_t, int16x4_t]
+      - [s_u32, "u32", "i32", uint32x2_t, int32x2_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[3]}"
+          - FnCall:
+              - "vqshl{neon_type[3].noq}"
+              - - FnCall: ["vdup{neon_type[3].N}", [a]]
+                - FnCall: ["vdup{neon_type[4].N}", [b]]
+      - FnCall: [simd_extract!, [c, '0']]
+
+  - name: "vqshl{type[0]}"
+    doc: "Unsigned saturating shift left"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [b_n_u8, "u8", '3']
+      - [h_n_u16, "u16", '4']
+      - [s_n_u32, "u32", '5']
+      - [d_n_u64, "u64", '6']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
+      - FnCall:
+          - simd_extract!
+          - - FnCall: ["vqshl_n_{type[1]}::<N>", [{FnCall: ["vdup_n_{type[1]}", [a]]}]]
+            - '0'
+
+  - name: "vqshrnd_n_s64"
+    doc: "Signed saturating shift right narrow"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "i32"]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vqshrnd{type[1]}"
+          arguments:
+            - "a: {type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.sqshrn.{type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vqshrnd_n_s64", [a, N]]
+
+  - name: "vqshrn{type[0]}"
+    doc: "Signed saturating shift right narrow"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [h_n_s16, "i16", "i8", 'N >= 1 && N <= 8', s16]
+      - [s_n_s32, "i32", "i16", 'N >= 1 && N <= 16', s32]
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vqshrn_n_{type[4]}::<N>"
+                - - FnCall: ["vdupq_n_{type[4]}", [a]]
+            - '0'
+
+  - name: "vqshrn{type[0]}"
+    doc: "Signed saturating shift right narrow"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', s16]
+      - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]', s32]
+      - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]', s64]
+    compose:
+      - FnCall: [static_assert!, ["{type[4]}"]]
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - FnCall: ["vqshrn_n_{type[6]}::<N>", [b]]
+            - "{type[5]}"
+
+  - name: "vqshrnd_n_u64"
+    doc: "Unsigned saturating shift right narrow"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u64", "u32"]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vqshrnd_n_u64"
+          arguments:
+            - "a: u64"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.uqshrn.i32"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vqshrnd_n_u64", ["a.as_signed()", N]]
+
+  - name: "vqshrn{type[0]}"
+    doc: "Unsigned saturating shift right narrow"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ['h_n_u16', "u16", "u8", 'N >= 1 && N <= 8']
+      - ['s_n_u32', "u32", "u16", 'N >= 1 && N <= 16']
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - FnCall:
+          - "simd_extract!"
+          - - FnCall:
+                - "vqshrn_n_{type[1]}::<N>"
+                - - FnCall: ["vdupq_n_{type[1]}", [a]]
+            - '0'
+
+  - name: "vqshrn{type[0]}"
+    doc: "Unsigned saturating shift right narrow"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+    compose:
+      - FnCall: [static_assert!, ["{type[4]}"]]
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - FnCall: ["vqshrn_n_{neon_type[2]}::<N>", [b]]
+            - "{type[5]}"
+
+  - name: "vqshrun{type[0]}"
+    doc: "Signed saturating shift right unsigned narrow"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [h_n_s16, "i16", "u8", 'N >= 1 && N <= 8', s16]
+      - [s_n_s32, "i32", "u16", 'N >= 1 && N <= 16', s32]
+      - [d_n_s64, "i64", "u32", 'N >= 1 && N <= 32', s64]
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vqshrun_n_{type[4]}::<N>"
+                - - FnCall: ["vdupq_n_{type[4]}", [a]]
+            - '0'
+
+  - name: "vqshrun_high_n_{neon_type[1]}"
+    doc: "Signed saturating shift right unsigned narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - FnCall: ["vqshrun_n_{neon_type[1]}::<N>", [b]]
+            - "{type[4]}"
+
+  - name: "vsqadd{type[0]}"
+    doc: "Unsigned saturating accumulate of signed value"
+    arguments: ["a: {type[1]}", "b: {type[2]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [b_u8, "u8", "i8", s8]
+      - [h_u16, "u16", "i16", s16]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vsqadd_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+                  - FnCall: ["vdup_n_{type[2]}", [b]]
+            - '0'
+
+  - name: "vsqadd{type[0]}"
+    doc: "Unsigned saturating accumulate of signed value"
+    arguments: ["a: {type[1]}", "b: {type[2]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [s_u32, "u32", "i32"]
+      - [d_u64, "u64", "i64"]
+    compose:
+      - LLVMLink:
+          name: "vsqadd{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.usqadd.{type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vsqrt{neon_type.no}"
+    doc: "Calculates the square root of each lane."
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - FnCall: [simd_fsqrt, [a]]
+
+  - name: "vrsqrts{type[0]}"
+    doc: "Floating-point reciprocal square root step"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [_f64, float64x1_t, v1f64]
+      - [q_f64, float64x2_t, v2f64]
+    compose:
+      - LLVMLink:
+          name: "vrsqrts{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.frsqrts.{type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vrsqrts{type[0]}"
+    doc: "Floating-point reciprocal square root step"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [s_f32, "f32"]
+      - [d_f64, "f64"]
+    compose:
+      - LLVMLink:
+          name: "vrsqrts{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.frsqrts.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vrecpe{type[0]}"
+    doc: "Reciprocal estimate."
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [_f64, float64x1_t, v1f64]
+      - [q_f64, float64x2_t, v2f64]
+    compose:
+      - LLVMLink:
+          name: "vrecpe{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.frecpe.{type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vrecpe{type[0]}"
+    doc: "Reciprocal estimate."
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [s_f32, "f32"]
+      - [d_f64, "f64"]
+    compose:
+      - LLVMLink:
+          name: "vrecpe{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.frecpe.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vrecps{type[0]}"
+    doc: "Floating-point reciprocal step"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [_f64, float64x1_t, v1f64]
+      - [q_f64, float64x2_t, v2f64]
+    compose:
+      - LLVMLink:
+          name: "vrecps{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.frecps.{type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vrecps{type[0]}"
+    doc: "Floating-point reciprocal step"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [s_f32, "f32"]
+      - [d_f64, "f64"]
+    compose:
+      - LLVMLink:
+          name: "vrecps{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.frecps.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vrecpx{type[0]}"
+    doc: "Floating-point reciprocal exponent"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [s_f32, "f32"]
+      - [d_f64, "f64"]
+    compose:
+      - LLVMLink:
+          name: "vrecpxs{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.frecpx.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - [poly64x1_t, int64x1_t]
+      - [poly64x1_t, uint64x1_t]
+      - [int64x1_t, poly64x1_t]
+      - [uint64x1_t, poly64x1_t]
+      - [poly64x2_t, int64x2_t]
+      - [poly64x2_t, uint64x2_t]
+      - [int64x2_t, poly64x2_t]
+      - [uint64x2_t, poly64x2_t]
+      - [float64x1_t, int8x8_t]
+      - [float64x1_t, int16x4_t]
+      - [float64x1_t, int32x2_t]
+      - [float64x1_t, int64x1_t]
+      - [float64x2_t, int8x16_t]
+      - [float64x2_t, int16x8_t]
+      - [float64x2_t, int32x4_t]
+      - [float64x2_t, int64x2_t]
+      - [float64x1_t, uint8x8_t]
+      - [float64x1_t, uint16x4_t]
+      - [float64x1_t, uint32x2_t]
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint8x16_t]
+      - [float64x2_t, uint16x8_t]
+      - [float64x2_t, uint32x4_t]
+      - [float64x2_t, uint64x2_t]
+      - [float64x1_t, poly8x8_t]
+      - [float64x1_t, poly16x4_t]
+      - [float32x2_t, poly64x1_t]
+      - [float64x1_t, poly64x1_t]
+      - [float64x2_t, poly8x16_t]
+      - [float64x2_t, poly16x8_t]
+      - [float32x4_t, poly64x2_t]
+      - [float64x2_t, poly64x2_t]
+      - [float64x2_t, p128]
+      - [int8x8_t, float64x1_t]
+      - [int16x4_t, float64x1_t]
+      - [int32x2_t, float64x1_t]
+      - [int64x1_t, float64x1_t]
+      - [int8x16_t, float64x2_t]
+      - [int16x8_t, float64x2_t]
+      - [int32x4_t, float64x2_t]
+      - [int64x2_t, float64x2_t]
+      - [poly8x8_t, float64x1_t]
+      - [uint16x4_t, float64x1_t]
+      - [uint32x2_t, float64x1_t]
+      - [uint64x1_t, float64x1_t]
+      - [poly8x16_t, float64x2_t]
+      - [uint16x8_t, float64x2_t]
+      - [uint32x4_t, float64x2_t]
+      - [uint64x2_t, float64x2_t]
+      - [uint8x8_t, float64x1_t]
+      - [poly16x4_t, float64x1_t]
+      - [poly64x1_t, float64x1_t]
+      - [poly64x1_t, float32x2_t]
+      - [uint8x16_t, float64x2_t]
+      - [poly16x8_t, float64x2_t]
+      - [poly64x2_t, float64x2_t]
+      - [poly64x2_t, float32x4_t]
+      - [p128, float64x2_t]
+      - [float32x2_t, float64x1_t]
+      - [float64x1_t, float32x2_t]
+      - [float32x4_t, float64x2_t]
+      - [float64x2_t, float32x4_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+  - name: "vrshld_s64"
+    doc: "Signed rounding shift left"
+    arguments: ["a: {type}", "b: {type}"]
+    return_type: "{type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - "i64"
+    compose:
+      - LLVMLink:
+          name: "vrshld_{type}"
+          links:
+            - link: "llvm.aarch64.neon.srshl.{type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrshld_{type[0]}"
+    doc: "Unsigned rounding shift left"
+    arguments: ["a: {type[0]}", "b: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u64", "i64"]
+    compose:
+      - LLVMLink:
+          name: "vrshld_{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.urshl.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vrshrd_n_s64"
+    doc: "Signed rounding shift right"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", 'N >= 1 && N <= 64', '-N as i64']
+    compose:
+      - FnCall: [static_assert!, ["{type[1]}"]]
+      - FnCall: [vrshld_s64, [a, "{type[2]}"]]
+
+  - name: "vrshrd_n_u64"
+    doc: "Unsigned rounding shift right"
+    arguments: ["a: {type}"]
+    return_type: "{type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - "u64"
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 64']]
+      - FnCall: ["vrshld_u64", [a, '-N as i64']]
+
+  - name: "vrshrn_high_n_{neon_type[1]}"
+    doc: "Rounding shift right narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - FnCall: ["vrshrn_n_{neon_type[1]}::<N>", [b]]
+            - "{type[4]}"
+
+  - name: "vrsubhn_high_{neon_type[1]}"
+    doc: "Rounding subtract returning high narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
+      - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+    compose:
+      - Let:
+          - x
+          - "{neon_type[0]}"
+          - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]
+      - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]]
+
+  - name: "vcopy{neon_type[0].lane_nox}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int8x8_t, int8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int16x4_t, int16x4_t, int16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int32x2_t, int32x2_t, int32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [float32x2_t, float32x2_t, float32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int8x16_t, int8x8_t, int8x16_t, '4', '3', ' let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int16x8_t, int16x4_t, int16x8_t, '3', '2', ' let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int32x4_t, int32x2_t, int32x4_t, '2', '1', ' let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3', ' let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2', ' let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1', ' let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3', ' let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2', ' let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
+      - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
+      - Identifier: ["{type[5]}", Symbol]
+
+  - name: "vcopy{neon_type[0].laneq_nox}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, int8x16_t, int8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int16x8_t, int16x8_t, int16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int32x4_t, int32x4_t, int32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int64x2_t, int64x2_t, int64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4', ' match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [float32x4_t, float32x4_t, float32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [float64x2_t, float64x2_t, float64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int8x8_t, int8x16_t, int8x8_t, '3', '4', ' let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int16x4_t, int16x8_t, int16x4_t, '2', '3', ' let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int32x2_t, int32x4_t, int32x2_t, '1', '2', ' let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4', ' let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3', ' let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2', 'let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4', ' let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3', ' let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [float32x2_t, float32x4_t, float32x2_t, '1', '2', ' let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
+      - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
+      - Identifier: ["{type[5]}", Symbol]
+
+  - name: "vcopyq_lane_{neon_type[0]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x2_t, int64x1_t, 'let b: int64x2_t = simd_shuffle!(b, b, [0, 1]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint64x2_t, uint64x1_t, 'let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly64x2_t, poly64x1_t, 'let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [float64x2_t, float64x1_t, ' let b: float64x2_t = simd_shuffle!(b, b, [0, 1]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, '1']]
+      - FnCall: [static_assert!, ['LANE2 == 0']]
+      - Identifier: ['{type[2]}', Symbol]
+
+  - name: "vcopyq_lane_f32"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x4_t, float32x2_t, ' let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, 2]]
+      - FnCall: [static_assert_uimm_bits!, [LANE2, 1]]
+      - Identifier: ["{type[2]}", Symbol]
+
+  - name: "vcreate_f64"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u64", float64x1_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+  - name: "vset_lane_f64"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f64", float64x1_t, float64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall: [simd_insert!, [b, 'LANE as u32', a]]
+
+  - name: "vsetq_lane_f64"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f64", float64x2_t, float64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - FnCall: [simd_insert!, [b, 'LANE as u32', a]]
+
+  - name: "vshld_s64"
+    doc: "Signed Shift left"
+    arguments: ["a: {type}", "b: {type}"]
+    return_type: "{type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - "i64"
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vshl_s64
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vshld_{type[0]}"
+    doc: "Unsigned Shift left"
+    arguments: ["a: {type[0]}", "b: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u64", "i64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vshl_u64
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vshll_high_n_{neon_type[0]}"
+    doc: "Signed shift left long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]']
+      - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32', '[2, 3]']
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
+      - FnCall: ["vshll_n_{neon_type[2]}::<N>", [b]]
+
+  - name: "vshll_high_n_{neon_type[0]}"
+    doc: "Signed shift left long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]']
+      - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32', '[2, 3]']
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
+      - FnCall: ["vshll_n_{neon_type[2]}::<N>", [b]]
+
+  - name: "vshrn_high_n_{neon_type[1]}"
+    doc: "Shift right narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - FnCall:
+          - simd_shuffle!
+          - - a
+            - FnCall: ["vshrn_n_{neon_type[1]}::<N>", [b]]
+            - "{type[4]}"
+
+  - name: "vsm3partw1{neon_type.no}"
+    doc: "SM3PARTW1"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sm4"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw1]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.sm3partw1
+          links:
+            - link: "llvm.aarch64.crypto.sm3partw1"
+              arch: aarch64,arm64ec
+
+  - name: "vsm3partw2{neon_type.no}"
+    doc: "SM3PARTW2"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sm4"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3partw2]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.sm3partw2
+          links:
+            - link: "llvm.aarch64.crypto.sm3partw2"
+              arch: aarch64,arm64ec
+
+  - name: "vsm3ss1{neon_type.no}"
+    doc: "SM3SS1"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sm4"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm3ss1]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.sm3ss1
+          links:
+            - link: "llvm.aarch64.crypto.sm3ss1"
+              arch: aarch64,arm64ec
+
+  - name: "vsm4ekey{neon_type.no}"
+    doc: "SM4 key"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sm4"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4ekey]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.sm4ekey
+          links:
+            - link: "llvm.aarch64.crypto.sm4ekey"
+              arch: aarch64,arm64ec
+
+  - name: "vsm4e{neon_type.no}"
+    doc: "SM4 encode"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sm4"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sm4e]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.sm4e
+          links:
+            - link: "llvm.aarch64.crypto.sm4e"
+              arch: aarch64,arm64ec
+
+  - name: "vrax1{neon_type.no}"
+    doc: "Rotate and exclusive OR"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rax1]]}]]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x2_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.rax1
+          links:
+            - link: "llvm.aarch64.crypto.rax1"
+              arch: aarch64,arm64ec
+
+  - name: "vsha512h{neon_type.no}"
+    doc: "SHA512 hash update part 1"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h]]}]]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x2_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.sha512h
+          links:
+            - link: "llvm.aarch64.crypto.sha512h"
+              arch: aarch64,arm64ec
+
+  - name: "vsha512h2{neon_type.no}"
+    doc: "SHA512 hash update part 2"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512h2]]}]]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x2_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.sha512h2
+          links:
+            - link: "llvm.aarch64.crypto.sha512h2"
+              arch: aarch64,arm64ec
+
+  - name: "vsha512su0{neon_type.no}"
+    doc: "SHA512 schedule update 0"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su0]]}]]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x2_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.sha512su0
+          links:
+            - link: "llvm.aarch64.crypto.sha512su0"
+              arch: aarch64,arm64ec
+
+  - name: "vsha512su1{neon_type.no}"
+    doc: "SHA512 schedule update 1"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sha512su1]]}]]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x2_t
+    compose:
+      - LLVMLink:
+          name: llvm.aarch64.crypto.sha512su1
+          links:
+            - link: "llvm.aarch64.crypto.sha512su1"
+              arch: aarch64,arm64ec
+
+  - name: "vrnd32x{neon_type.no}"
+    doc: "Floating-point round to 32-bit integer, using current rounding mode"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,frintts"']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint32x]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "vrnd32x{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.frint32x.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrnd32x{neon_type.no}"
+    doc: "Floating-point round to 32-bit integer, using current rounding mode"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,frintts"']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint32x]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - LLVMLink:
+          name: "vrnd32x{neon_type.no}"
+          arguments:
+            - "a: f64"
+          return_type: "f64"
+          links:
+            - link: "llvm.aarch64.frint32x.f64"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - _vrnd32x_f64
+                - - FnCall: [simd_extract!, [a, 0]]
+
+  - name: "vrnd32z{neon_type.no}"
+    doc: "Floating-point round to 32-bit integer toward zero"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,frintts"']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint32z]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "vrnd32z{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.frint32z.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrnd32z{neon_type.no}"
+    doc: "Floating-point round to 32-bit integer toward zero"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,frintts"']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint32z]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - LLVMLink:
+          name: "vrnd32z{neon_type.no}"
+          arguments:
+            - "a: f64"
+          return_type: "f64"
+          links:
+            - link: "llvm.aarch64.frint32z.f64"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - transmute
+          - - FnCall: [_vrnd32z_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
+
+  - name: "vrnd64x{neon_type.no}"
+    doc: "Floating-point round to 64-bit integer, using current rounding mode"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,frintts"']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint64x]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "vrnd64x{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.frint64x.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrnd64x{neon_type.no}"
+    doc: "Floating-point round to 64-bit integer, using current rounding mode"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,frintts"']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint64x]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - LLVMLink:
+          name: "vrnd64x{neon_type.no}"
+          arguments:
+            - "a: f64"
+          return_type: "f64"
+          links:
+            - link: "llvm.aarch64.frint64x.f64"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - transmute
+          - - FnCall: [_vrnd64x_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
+
+  - name: "vrnd64z{neon_type.no}"
+    doc: "Floating-point round to 64-bit integer toward zero"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,frintts"']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint64z]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "vrnd64z{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.frint64z.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrnd64z{neon_type.no}"
+    doc: "Floating-point round to 64-bit integer toward zero"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,frintts"']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_ftts"', 'issue = "117227"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [frint64z]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - LLVMLink:
+          name: "vrnd64z{neon_type.no}"
+          arguments:
+            - "a: f64"
+          return_type: "f64"
+          links:
+            - link: "llvm.aarch64.frint64z.f64"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - transmute
+          - - FnCall: [_vrnd64z_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
+
+  - name: "vtrn1{neon_type[0].no}"
+    doc: Transpose vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
+      - [int8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]']
+      - [int16x4_t, '[0, 4, 2, 6]']
+      - [int16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
+      - [int32x4_t, '[0, 4, 2, 6]']
+      - [uint8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
+      - [uint8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]']
+      - [uint16x4_t, '[0, 4, 2, 6]']
+      - [uint16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
+      - [uint32x4_t, '[0, 4, 2, 6]']
+      - [poly8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
+      - [poly8x16_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]']
+      - [poly16x4_t, '[0, 4, 2, 6]']
+      - [poly16x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
+      - [float32x4_t, '[0, 4, 2, 6]']
+    compose:
+      - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]]
+
+  - name: "vtrn1{neon_type[0].no}"
+    doc: Transpose vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, '[0, 2]']
+      - [int64x2_t, '[0, 2]']
+      - [uint32x2_t, '[0, 2]']
+      - [uint64x2_t, '[0, 2]']
+      - [poly64x2_t, '[0, 2]']
+      - [float32x2_t, '[0, 2]']
+      - [float64x2_t, '[0, 2]']
+    compose:
+      - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]]
+
+  - name: "vtrn2{neon_type[0].no}"
+    doc: Transpose vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [int8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]']
+      - [int16x4_t, '[1, 5, 3, 7]']
+      - [int16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [int32x4_t, '[1, 5, 3, 7]']
+      - [uint8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [uint8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]']
+      - [uint16x4_t, '[1, 5, 3, 7]']
+      - [uint16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [uint32x4_t, '[1, 5, 3, 7]']
+      - [poly8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [poly8x16_t, '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]']
+      - [poly16x4_t, '[1, 5, 3, 7]']
+      - [poly16x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [float32x4_t, '[1, 5, 3, 7]']
+    compose:
+      - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]]
+
+  - name: "vtrn2{neon_type[0].no}"
+    doc: Transpose vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, '[1, 3]']
+      - [int64x2_t, '[1, 3]']
+      - [uint32x2_t, '[1, 3]']
+      - [uint64x2_t, '[1, 3]']
+      - [poly64x2_t, '[1, 3]']
+      - [float32x2_t, '[1, 3]']
+      - [float64x2_t, '[1, 3]']
+    compose:
+      - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]]
+
+  - name: "vzip2{neon_type[0].no}"
+    doc: Zip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [int8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
+      - [int16x4_t, '[2, 6, 3, 7]']
+      - [int16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [int32x2_t, '[1, 3]']
+      - [int32x4_t, '[2, 6, 3, 7]']
+      - [int64x2_t, '[1, 3]']
+      - [uint8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [uint8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
+      - [uint16x4_t, '[2, 6, 3, 7]']
+      - [uint16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [uint32x2_t, '[1, 3]']
+      - [uint32x4_t, '[2, 6, 3, 7]']
+      - [uint64x2_t, '[1, 3]']
+      - [poly8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [poly8x16_t, '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
+      - [poly16x4_t, '[2, 6, 3, 7]']
+      - [poly16x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [poly64x2_t, '[1, 3]']
+      - [float32x2_t, '[1, 3]']
+      - [float32x4_t, '[2, 6, 3, 7]']
+      - [float64x2_t, '[1, 3]']
+    compose:
+      - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]]
+
+  - name: "vzip1{neon_type[0].no}"
+    doc: Zip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
+      - [int8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]']
+      - [int16x4_t, '[0, 4, 1, 5]']
+      - [int16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
+      - [int32x2_t, '[0, 2]']
+      - [int32x4_t, '[0, 4, 1, 5]']
+      - [int64x2_t, '[0, 2]']
+      - [uint8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
+      - [uint8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]']
+      - [uint16x4_t, '[0, 4, 1, 5]']
+      - [uint16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
+      - [uint32x2_t, '[0, 2]']
+      - [uint32x4_t, '[0, 4, 1, 5]']
+      - [uint64x2_t, '[0, 2]']
+      - [poly8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
+      - [poly8x16_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]']
+      - [poly16x4_t, '[0, 4, 1, 5]']
+      - [poly16x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
+      - [poly64x2_t, '[0, 2]']
+      - [float32x2_t, '[0, 2]']
+      - [float32x4_t, '[0, 4, 1, 5]']
+      - [float64x2_t, '[0, 2]']
+    compose:
+      - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]]
+
+  - name: "vuzp1{neon_type[0].no}"
+    doc: Unzip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, '[0, 2]']
+      - [int64x2_t, '[0, 2]']
+      - [uint32x2_t, '[0, 2]']
+      - [uint64x2_t, '[0, 2]']
+      - [poly64x2_t, '[0, 2]']
+      - [float32x2_t, '[0, 2]']
+      - [float64x2_t, '[0, 2]']
+    compose:
+      - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]]
+
+  - name: "vuzp1{neon_type[0].no}"
+    doc: Unzip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
+      - [int8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]']
+      - [int16x4_t, '[0, 2, 4, 6]']
+      - [int16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
+      - [int32x4_t, '[0, 2, 4, 6]']
+      - [uint8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
+      - [uint8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]']
+      - [uint16x4_t, '[0, 2, 4, 6]']
+      - [uint16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
+      - [uint32x4_t, '[0, 2, 4, 6] ']
+      - [poly8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
+      - [poly8x16_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]']
+      - [poly16x4_t, '[0, 2, 4, 6]']
+      - [poly16x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
+      - [float32x4_t, '[0, 2, 4, 6]']
+    compose:
+      - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]]
+
+  - name: "vuzp2{neon_type[0].no}"
+    doc: Unzip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, '[1, 3]']
+      - [int64x2_t, '[1, 3]']
+      - [uint32x2_t, '[1, 3]']
+      - [uint64x2_t, '[1, 3]']
+      - [poly64x2_t, '[1, 3]']
+      - [float32x2_t, '[1, 3]']
+      - [float64x2_t, '[1, 3]']
+    compose:
+      - FnCall: ["simd_shuffle!", [a, b, "{type[1]}"]]
+
+  - name: "vuzp2{neon_type[0].no}"
+    doc: Unzip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [int8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]']
+      - [int16x4_t, '[1, 3, 5, 7]']
+      - [int16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [int32x4_t, '[1, 3, 5, 7]']
+      - [uint8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [uint8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]']
+      - [uint16x4_t, '[1, 3, 5, 7]']
+      - [uint16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [uint32x4_t, '[1, 3, 5, 7]']
+      - [poly8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [poly8x16_t, '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]']
+      - [poly16x4_t, '[1, 3, 5, 7]']
+      - [poly16x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [float32x4_t, '[1, 3, 5, 7]']
+    compose:
+      - FnCall:
+          - "simd_shuffle!"
+          - - a
+            - b
+            - "{type[1]}"
+
+  - name: "vabal_high_{neon_type[1]}"
+    doc: "Unsigned Absolute difference and Accumulate Long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]', '[2, 3]']
+    compose:
+      - Let:
+          - d
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+      - Let:
+          - e
+          - "{neon_type[2]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+      - Let: [f, "{neon_type[2]}", {FnCall: ["vabd_{neon_type[2]}", [d, e]]}]
+      - FnCall:
+          - simd_add
+          - - a
+            - FnCall: [simd_cast, [f]]
+
+  - name: "vabal_high{neon_type[1].noq}"
+    doc: Signed Absolute difference and Accumulate Long
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t]
+      - [int32x4_t, int16x8_t, int16x8_t, '[4, 5, 6, 7]', int16x4_t, uint16x4_t]
+      - [int64x2_t, int32x4_t, int32x4_t, '[2, 3]', int32x2_t, uint32x2_t]
+    compose:
+      - Let:
+          - d
+          - "{neon_type[4]}"
+          - FnCall:
+              - simd_shuffle!
+              - - b
+                - b
+                - "{type[3]}"
+      - Let:
+          - e
+          - "{neon_type[4]}"
+          - FnCall:
+              - simd_shuffle!
+              - - c
+                - c
+                - "{type[3]}"
+      - Let:
+          - f
+          - "{neon_type[4]}"
+          - FnCall:
+              - "vabd{neon_type[4].no}"
+              - - d
+                - e
+      - Let:
+          - f
+          - "{neon_type[5]}"
+          - FnCall:
+              - simd_cast
+              - - f
+      - FnCall:
+          - simd_add
+          - - a
+            - FnCall:
+                - simd_cast
+                - - f
+
+  - name: "vqabs{neon_type.no}"
+    doc: Signed saturating Absolute value
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - int64x1_t
+      - int64x2_t
+    compose:
+      - LLVMLink:
+          name: "sqabs.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.sqabs.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vslid_n_{type}"
+    doc: Shift left and insert
+    arguments: ["a: {type}", "b: {type}"]
+    return_type: "{type}"
+    static_defs:
+      - "const N: i32"
+    attr:
+      - *neon-stable
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sli, 'N = 2']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - i64
+      - u64
+    compose:
+      - FnCall:
+          - "static_assert!"
+          - - 'N >= 0 && N <= 63'
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vsli_n_{type}::<N>"
+                - - FnCall:
+                      - transmute
+                      - - a
+                  - FnCall:
+                      - transmute
+                      - - b
+
+  - name: "vsrid_n_{type}"
+    doc: Shift right and insert
+    arguments: ["a: {type}", "b: {type}"]
+    return_type: "{type}"
+    static_defs:
+      - "const N: i32"
+    attr:
+      - *neon-stable
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sri, 'N = 2']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - i64
+      - u64
+    compose:
+      - FnCall:
+          - "static_assert!"
+          - - 'N >= 1 && N <= 64'
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vsri_n_{type}::<N>"
+                - - FnCall:
+                      - transmute
+                      - - a
+                  - FnCall:
+                      - transmute
+                      - - b
+
+  - name: "vpmaxnm{neon_type.no}"
+    doc: "Floating-point Maximum Number Pairwise (vector)."
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float64x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "vpmaxnm{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*mut f64', float64x1x2_t, float64x1_t]
+      - ['*mut f64', float64x2x2_t, float64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vst1{neon_type[1].no}"
+          arguments:
+            - "a: {neon_type[2]}"
+            - "b: {neon_type[2]}"
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.st1x{neon_type[1].tuple}.{neon_type[2]}.p0f64"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'a']]
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*mut f64', float64x1x3_t, float64x1_t]
+      - ['*mut f64', float64x2x3_t, float64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vst1{neon_type[1].no}"
+          arguments:
+            - "a: {neon_type[2]}"
+            - "b: {neon_type[2]}"
+            - "c: {neon_type[2]}"
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.st1x{neon_type[1].tuple}.{neon_type[2]}.p0f64"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'a']]
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*mut f64', float64x1x4_t, float64x1_t]
+      - ['*mut f64', float64x2x4_t, float64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vst1{neon_type[1].no}"
+          arguments:
+            - "a: {neon_type[2]}"
+            - "b: {neon_type[2]}"
+            - "c: {neon_type[2]}"
+            - "d: {neon_type[2]}"
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.st1x{neon_type[1].tuple}.{neon_type[2]}.p0f64"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'b.3', 'a']]
+
+  - name: "vfma{type[3]}"
+    doc: "Floating-point fused multiply-add to accumulator"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x2_t, '1', '_lane_f32']
+      - [float32x2_t, float32x4_t, '2', '_laneq_f32']
+      - [float32x4_t, float32x2_t, '1', 'q_lane_f32']
+      - [float32x4_t, float32x4_t, '2', 'q_laneq_f32']
+      - [float64x2_t, float64x2_t, '1', 'q_laneq_f64']
+    compose:
+      - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
+      - FnCall:
+          - "vfma{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+
+  - name: "vfma_lane_f64"
+    doc: "Floating-point fused multiply-add to accumulator"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - FnCall: ["static_assert!", ["LANE == 0"]]
+      - FnCall:
+          - "vfma{neon_type.no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+
+  - name: "vfma_laneq_f64"
+    doc: "Floating-point fused multiply-add to accumulator"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, float64x2_t]
+    compose:
+      - FnCall: ["static_assert_uimm_bits!", ["LANE", "1"]]
+      - FnCall:
+          - "vfma{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+
+  - name: "vfmaq_lane_f64"
+    doc: "Floating-point fused multiply-add to accumulator"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x2_t, float64x1_t]
+    compose:
+      - FnCall: ["static_assert!", ["LANE == 0"]]
+      - FnCall:
+          - "vfma{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+
+  - name: "vfma{type[2]}"
+    doc: "Floating-point fused multiply-add to accumulator"
+    arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", float32x2_t, "s_lane_f32", '1']
+      - ["f32", float32x4_t, "s_laneq_f32", '2']
+      - ["f64", float64x2_t, "d_laneq_f64", '1']
+    compose:
+      - LLVMLink:
+          name: "_vfma{type[2]}"
+          arguments: ["a: {type[0]}", "b: {type[0]}", "c: {type[0]}"]
+          links:
+            - link: "llvm.fma.{type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
+      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}]
+      - FnCall: ["_vfma{type[2]}", [b, c, a]]
+
+  - name: "vfmad_lane_f64"
+    doc: "Floating-point fused multiply-add to accumulator"
+    arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f64", float64x1_t]
+    compose:
+      - LLVMLink:
+          name: "_vfmad_lane_f64"
+          arguments: ["a: f64", "b: f64", "c: f64"]
+          links:
+            - link: "llvm.fma.{type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}]
+      - FnCall: ["_vfmad_lane_f64", [b, c, a]]
+
+  - name: "vfms_f64"
+    doc: "Floating-point fused multiply-subtract from accumulator"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - Let: [b, "{neon_type}", {FnCall: [simd_neg, [b]]}]
+      - FnCall: [vfma_f64, [a, b, c]]
+
+  - name: "vfms{neon_type.no}"
+    doc: "Floating-point fused multiply-subtract from accumulator"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x2_t
+    compose:
+      - Let: [b, "{neon_type}", {FnCall: [simd_neg, [b]]}]
+      - FnCall: [vfmaq_f64, [a, b, c]]
+
+  - name: "vmls{neon_type.no}"
+    doc: "Floating-point multiply-subtract from accumulator"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - FnCall: [simd_sub, [a, {FnCall: [simd_mul, [b, c]]}]]
+
+  - name: "vfms{type[3]}"
+    doc: "Floating-point fused multiply-subtract to accumulator"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x2_t, '1', _lane_f32]
+      - [float32x2_t, float32x4_t, '2', _laneq_f32]
+      - [float32x4_t, float32x2_t, '1', q_lane_f32]
+      - [float32x4_t, float32x4_t, '2', q_laneq_f32]
+      - [float64x2_t, float64x2_t, '1', q_laneq_f64]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[2]}']]
+      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+
+  - name: "vfms_lane_f64"
+    doc: "Floating-point fused multiply-subtract to accumulator"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+
+  - name: "vfms_laneq_f64"
+    doc: "Floating-point fused multiply-subtract to accumulator"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, float64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+
+  - name: "vfmsq_lane_f64"
+    doc: "Floating-point fused multiply-subtract to accumulator"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x2_t, float64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+
+  - name: "vfms{type[2]}"
+    doc: "Floating-point fused multiply-subtract to accumulator"
+    arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", float32x2_t, "s_lane_f32"]
+      - ["f32", float32x4_t, "s_laneq_f32"]
+      - ["f64", float64x1_t, "d_lane_f64"]
+      - ["f64", float64x2_t, "d_laneq_f64"]
+    compose:
+      - FnCall: ["vfma{type[2]}::<LANE>", ['a', '-b', 'c']]
+
+  - name: "vceqz{neon_type[0].no}"
+    doc: "Floating-point compare bitwise equal to zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t, 'f32x2', 'f32x2::new(0.0, 0.0)']
+      - [float32x4_t, uint32x4_t, 'f32x4', 'f32x4::new(0.0, 0.0, 0.0, 0.0)']
+      - [float64x1_t, uint64x1_t, 'f64', '0.0']
+      - [float64x2_t, uint64x2_t, 'f64x2', 'f64x2::new(0.0, 0.0)']
+    compose:
+      - Let: [b, '{type[2]}', '{type[3]}']
+      - FnCall: [simd_eq, [a, {FnCall: [transmute, [b]]}]]
+
+  - name: "vceqz{type[2]}"
+    doc: "Floating-point compare bitwise equal to zero"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "u32", "s_f32"]
+      - ["f64", "u64", "d_f64"]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vceqz_{type[0]}"
+                - - FnCall: ["vdup_n_{type[0]}", [a]]
+            - '0'
+
+  - name: "vceqzd_{type[2]}"
+    doc: "Compare bitwise equal to zero"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "u64", "s64"]
+      - ["u64", "u64", "u64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vceqz_{type[2]}"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vceqz{neon_type[0].no}"
+    doc: "Signed compare bitwise equal to zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)']
+      - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)']
+      - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)']
+      - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)']
+      - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)']
+      - [poly8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [poly8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [poly64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)']
+      - [poly64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall:
+          - simd_eq
+          - - a
+            - FnCall: [transmute, [b]]
+
+  - name: "vceqz{neon_type[0].no}"
+    doc: "Unsigned compare bitwise equal to zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint8x8_t, u8x8, 'u8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [uint8x16_t, uint8x16_t, u8x16, 'u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [uint16x4_t, uint16x4_t, u16x4, 'u16x4::new(0, 0, 0, 0)']
+      - [uint16x8_t, uint16x8_t, u16x8, 'u16x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [uint32x2_t, uint32x2_t, u32x2, 'u32x2::new(0, 0)']
+      - [uint32x4_t, uint32x4_t, u32x4, 'u32x4::new(0, 0, 0, 0)']
+      - [uint64x1_t, uint64x1_t, u64x1, 'u64x1::new(0)']
+      - [uint64x2_t, uint64x2_t, u64x2, 'u64x2::new(0, 0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall:
+          - simd_eq
+          - - a
+            - FnCall: [transmute, [b]]
+
+  - name: "vcge{neon_type.no}"
+    doc: "Compare unsigned greater than or equal"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x1_t
+      - uint64x2_t
+    compose:
+      - FnCall: [simd_ge, [a, b]]
+
+  - name: "vcge{type[0]}"
+    doc: "Floating-point compare greater than or equal"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vcge_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+                  - FnCall: ["vdup_n_{type[1]}", [b]]
+            - '0'
+
+  - name: "vcge{neon_type[0].no}"
+    doc: "Floating-point compare greater than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_ge, [a, b]]
+
+  - name: "vcge{type[0]}"
+    doc: "Compare greater than or equal"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["d_s64", "i64", "u64", s64]
+      - ["d_u64", "u64", "u64", u64]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vcge_{type[3]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vclt{neon_type.no}"
+    doc: "Compare unsigned less than"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x1_t
+      - uint64x2_t
+    compose:
+      - FnCall: [simd_lt, [a, b]]
+
+  - name: "vcltd_{type[0]}"
+    doc: "Compare less than"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s64", "i64", "u64"]
+      - ["u64", "u64", "u64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vclt_{type[0]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vtst{neon_type[0].no}"
+    doc: "Unsigned compare bitwise Test bits nonzero"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint64x1_t, u64x1, 'u64x1::new(0)']
+      - [uint64x2_t, u64x2, 'u64x2::new(0, 0)']
+    compose:
+      - Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}]
+      - Let: [d, "{type[1]}", "{type[2]}"]
+      - FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]]
+
+  - name: "vcgez{neon_type[0].no}"
+    doc: "Floating-point compare greater than or equal to zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
+      - [float32x4_t, uint32x4_t, f32x4, 'f32x4::new(0.0, 0.0, 0.0, 0.0)']
+      - [float64x1_t, uint64x1_t, f64, '0.0']
+      - [float64x2_t, uint64x2_t, f64x2, 'f64x2::new(0.0, 0.0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall:
+          - simd_ge
+          - - a
+            - FnCall: [transmute, [b]]
+
+  - name: "vcgez{type[0]}"
+    doc: "Floating-point compare greater than or equal to zero"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vcgez_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+            - '0'
+
+  - name: "vclezd_s64"
+    doc: "Compare less than or equal to zero"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "u64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall: [vclez_s64, [{FnCall: [transmute, [a]]}]]
+
+  - name: "vcgtd_{type[2]}"
+    doc: "Compare greater than"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "u64", 's64']
+      - ["u64", "u64", 'u64']
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vcgt_{type[2]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vcgtz{neon_type[0].no}"
+    doc: "Compare signed greater than zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)']
+      - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)']
+      - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)']
+      - [int64x1_t, uint64x1_t, i64x1, 'i64x1::new(0)']
+      - [int64x2_t, uint64x2_t, i64x2, 'i64x2::new(0, 0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall:
+          - simd_gt
+          - - a
+            - FnCall: [transmute, [b]]
+
+  - name: "vcgtzd_s64"
+    doc: "Compare signed greater than zero"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", "u64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vcgtz_s64
+                - - FnCall: [transmute, [a]]
+
+  - name: "vcgtz{neon_type[0].no}"
+    doc: "Floating-point compare greater than zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
+      - [float32x4_t, uint32x4_t, f32x4, 'f32x4::new(0.0, 0.0, 0.0, 0.0)']
+      - [float64x1_t, uint64x1_t, f64, '0.0']
+      - [float64x2_t, uint64x2_t, f64x2, 'f64x2::new(0.0, 0.0)']
+    compose:
+      - Let: [b, "{type[2]}", "{type[3]}"]
+      - FnCall: [simd_gt, [a, {FnCall: [transmute, [b]]}]]
+
+  - name: "vcgtz{type[0]}"
+    doc: "Floating-point compare greater than zero"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32", "u32"]
+      - ["d_f64", "f64", "u64"]
+    compose:
+      - FnCall:
+          - "simd_extract!"
+          - - FnCall:
+                - "vcgtz_{type[1]}"
+                - - FnCall: ["vdup_n_{type[1]}", [a]]
+            - '0'
+
+  - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to unsigned fixed-point, rounding toward zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.fptoui.sat.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmul{neon_type[0].N}"
+    doc: "Vector multiply by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, "f64"]
+      - [float64x2_t, "f64"]
+    compose:
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall: ["vdup{neon_type[0].N}", [b]]
+
+  - name: "vmul_lane_f64"
+    doc: "Floating-point multiply"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall:
+                - "transmute::<f64, _>"
+                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+
+  - name: "vmulq_lane_f64"
+    doc: "Floating-point multiply"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x2_t, float64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall: ["simd_shuffle!", [b, b, '[LANE as u32, LANE as u32]']]
+
+  - name: "vmuld_lane_f64"
+    doc: "Floating-point multiply"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f64", float64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
+      - Identifier: ['a * b', Symbol]
+
+  - name: "vmul_laneq_f64"
+    doc: "Floating-point multiply"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, float64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall:
+                - "transmute::<f64, _>"
+                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+
+  - name: "vmulq_laneq_f64"
+    doc: "Floating-point multiply"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x2_t, float64x2_t, float64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall: [simd_shuffle!, [b, b, '[LANE as u32, LANE as u32]']]
+
+  - name: "vmul{type[2]}"
+    doc: "Floating-point multiply"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", float32x2_t, "s_lane_f32", '1']
+      - ["f32", float32x4_t, "s_laneq_f32", '2']
+      - ["f64", float64x2_t, "d_laneq_f64", '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
+      - Identifier: ['a * b', Symbol]
+
+  - name: "vrsrad_n_s64"
+    doc: "Signed rounding shift right and accumulate."
+    arguments: ["a: {type}", "b: {type}"]
+    return_type: "{type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - "i64"
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 64']]
+      - Let: [b, "{type}", {FnCall: ["vrshrd_n_s64::<N>", [b]]}]
+      - Identifier: ['a.wrapping_add(b)', Symbol]
+
+  - name: "vmlsl_high_n_{neon_type[1]}"
+    doc: "Multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x8_t, "i16"]
+      - [int64x2_t, int32x4_t, "i32"]
+    compose:
+      - FnCall: ["vmlsl_high_{neon_type[1]}", [a, b, {FnCall: ["vdupq_n_{neon_type[1]}", [c]]}]]
+
+  - name: "vmlsl_high_n_{neon_type[1]}"
+    doc: "Multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, uint16x8_t, "u16"]
+      - [uint64x2_t, uint32x4_t, "u32"]
+    compose:
+      - FnCall: ["vmlsl_high_{neon_type[1]}", [a, b, {FnCall: ["vdupq_n_{neon_type[1]}", [c]]}]]
+
+  - name: "vmlsl_high_lane{neon_type[2].no}"
+    doc: "Multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - "vmlsl_high_{neon_type[1]}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vmlsl_high_lane{neon_type[2].no}"
+    doc: "Multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - "vmlsl_high_{neon_type[1]}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vclt{neon_type[0].no}"
+    doc: "Floating-point compare less than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_lt, [a, b]]
+
+  - name: "vclt{type[2]}"
+    doc: "Floating-point compare less than"
+    arguments: ["a: {type[0]}", "b: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["f32", "u32", 's_f32']
+      - ["f64", "u64", 'd_f64']
+    compose:
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vclt_{type[0]}"
+                - - FnCall: ["vdup_n_{type[0]}", [a]]
+                  - FnCall: ["vdup_n_{type[0]}", [b]]
+            - '0'
+
+  - name: "vabdl_high_{neon_type[0]}"
+    doc: "Unsigned Absolute difference Long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]']
+      - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]']
+    compose:
+      - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]}]
+      - Let: [d, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - FnCall: [simd_cast, [{FnCall: ["vabd_{neon_type[0]}", [c, d]]}]]
+
+  - name: "vfms_n_f64"
+    doc: "Floating-point fused Multiply-subtract to accumulator(vector)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, "f64"]
+    compose:
+      - FnCall:
+          - "vfms{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[0].N}", [c]]
+
+  - name: "vfmsq_n_f64"
+    doc: "Floating-point fused Multiply-subtract to accumulator(vector)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x2_t, "f64"]
+    compose:
+      - FnCall:
+          - "vfms{neon_type[1].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[1].N}", [c]]
+
+  - name: "vpminnm{type[0]}"
+    doc: "Floating-point minimum number pairwise"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['s_f32', float32x2_t, "f32"]
+      - ['qd_f64', float64x2_t, "f64"]
+    compose:
+      - LLVMLink:
+          name: "vpminnm{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmv.{type[2]}.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vpmaxnm{type[0]}"
+    doc: "Floating-point maximum number pairwise"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['s_f32', float32x2_t, "f32"]
+      - ['qd_f64', float64x2_t, "f64"]
+    compose:
+      - LLVMLink:
+          name: "vpmaxnm{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmv.{type[2]}.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcled_{type[0]}"
+    doc: "Compare less than or equal"
+    arguments: ["a: {type[1]}", "b: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s64", "i64", "u64"]
+      - ["u64", "u64", "u64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vcle_{type[0]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vqdmulh{neon_type[0].lane_nox}"
+    doc: "Vector saturating doubling multiply high by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int16x4_t, '2']
+      - [int16x8_t, int16x4_t, '2']
+      - [int32x2_t, int32x2_t, '1']
+      - [int32x4_t, int32x2_t, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - FnCall:
+          - "vqdmulh{neon_type[0].no}"
+          - - a
+            - FnCall:
+                - "vdup{neon_type[0].N}"
+                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+
+  - name: "vqabs{type[2]}"
+    doc: "Signed saturating absolute value"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i8", "s8", 'b_s8']
+      - ["i16", "s16", 'h_s16']
+    compose:
+      - FnCall:
+          - "simd_extract!"
+          - - FnCall: ["vqabs_{type[1]}", [{FnCall: ["vdup_n_{type[1]}", [a]]}]]
+            - '0'
+
+  - name: "vqabs{type[1]}"
+    doc: "Signed saturating absolute value"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i32", "s_s32"]
+      - ["i64", "d_s64"]
+    compose:
+      - LLVMLink:
+          name: "vqabs{type[1]}"
+          links:
+            - link: "llvm.aarch64.neon.sqabs.{type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmull_high_n_{neon_type[0]}"
+    doc: "Multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, "i16", int32x4_t]
+      - [int32x4_t, "i32", int64x2_t]
+    compose:
+      - FnCall:
+          - "vmull_high_{neon_type[0]}"
+          - - a
+            - FnCall: ["vdupq_n_{neon_type[0]}", [b]]
+
+  - name: "vmull_high_n_{neon_type[0]}"
+    doc: "Multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, "u16", uint32x4_t]
+      - [uint32x4_t, "u32", uint64x2_t]
+    compose:
+      - FnCall:
+          - "vmull_high_{neon_type[0]}"
+          - - a
+            - FnCall: ["vdupq_n_{neon_type[0]}", [b]]
+
+  - name: "vmull_high_lane{neon_type[1].no}"
+    doc: "Multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vmull_high_{neon_type[0]}"
+          - - a
+            - FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
+
+  - name: "vmull_high_lane{neon_type[1].no}"
+    doc: "Multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vmull_high_{neon_type[0]}"
+          - - a
+            - FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
+
+  - name: "vrsqrte{neon_type.no}"
+    doc: "Reciprocal square-root estimate."
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - float64x1_t
+      - float64x2_t
+    compose:
+      - LLVMLink:
+          name: "vrsqrte{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.frsqrte.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrsqrte{type[0]}"
+    doc: "Reciprocal square-root estimate."
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s_f32", "f32"]
+      - ["d_f64", "f64"]
+    compose:
+      - LLVMLink:
+          name: "vrsqrte{neon_type[1].no}"
+          links:
+            - link: "llvm.aarch64.neon.frsqrte.{type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vpminnm{neon_type.no}"
+    doc: "Floating-point Minimum Number Pairwise (vector)."
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float64x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "vpminnm{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vqshlu{type[0]}"
+    doc: "Signed saturating shift left unsigned"
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshlu, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [b_n_s8, i8, u8, '3', s8]
+      - [h_n_s16, i16, u16, '4', s16]
+      - [s_n_s32, i32, u32, '5', s32]
+      - [d_n_s64, i64, u64, '6', s64]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
+      - FnCall:
+          - simd_extract!
+          - - FnCall:
+                - "vqshlu_n_{type[4]}::<N>"
+                - - FnCall: ["vdup_n_{type[4]}", [a]]
+            - '0'
+
+  - name: "vcvta{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to away"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvta{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fcvtau.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to signed fixed-point, rounding toward zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float64x1_t, int64x1_t]
+      - [float64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmlal_high_n_{neon_type[1]}"
+    doc: "Multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x8_t, "i16"]
+      - [int64x2_t, int32x4_t, "i32"]
+    compose:
+      - FnCall:
+          - "vmlal_high_{neon_type[1]}"
+          - - a
+            - b
+            - FnCall: ["vdupq_n_{neon_type[1]}", [c]]
+
+  - name: "vmlal_high_n_{neon_type[1]}"
+    doc: "Multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, uint16x8_t, "u16"]
+      - [uint64x2_t, uint32x4_t, "u32"]
+    compose:
+      - FnCall:
+          - "vmlal_high_{neon_type[1]}"
+          - - a
+            - b
+            - FnCall: ["vdupq_n_{neon_type[1]}", [c]]
+
+  - name: "vmlal_high_lane{neon_type[2].no}"
+    doc: "Multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
+
+  - name: "vmlal_high_lane{neon_type[2].no}"
+    doc: "Multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
+
+  - name: "vrsrad_n_u64"
+    doc: "Unsigned rounding shift right and accumulate."
+    arguments: ["a: {type}", "b: {type}"]
+    return_type: "{type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - "u64"
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 64']]
+      - Let: [b, u64, {FnCall: ["vrshrd_n_u64::<N>", [b]]}]
+      - Identifier: ['a.wrapping_add(b)', Symbol]
+
+  - name: "vcle{neon_type.no}"
+    doc: "Compare unsigned less than or equal"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x1_t
+      - uint64x2_t
+    compose:
+      - FnCall: [simd_le, [a, b]]
+
+  - name: "vld4{neon_type[1].dup_nox}"
+    doc: "Load single 4-element structure and replicate to all lanes of four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x2x4_t, "v2i64", "p0i64"]
+      - ["*const f64", float64x1x4_t, "v1f64", "p0f64"]
+      - ["*const f64", float64x2x4_t, "v2f64", "p0f64"]
+    compose:
+      - LLVMLink:
+          name: "vld4{neon_type[1].dup_nox}"
+          arguments:
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.ld4r.{type[2]}.{type[3]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as _']]
+
+  - name: "vld4{neon_type[1].dup_nox}"
+    doc: "Load single 4-element structure and replicate to all lanes of four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x2x4_t, "q_dup_s64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall: ["vld4{type[2]}", [{FnCall: [transmute, [a]]}]]
+
+  - name: "vld4{neon_type[1].dup_nox}"
+    doc: "Load single 4-element structure and replicate to all lanes of four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,aes"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x2x4_t, "q_dup_s64"]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall: ["vld4{type[2]}", [{FnCall: [transmute, [a]]}]]
+
diff --git a/crates/stdarch-gen2/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen2/spec/neon/arm_shared.spec.yml
new file mode 100644
index 0000000000..87559963ed
--- /dev/null
+++ b/crates/stdarch-gen2/spec/neon/arm_shared.spec.yml
@@ -0,0 +1,9789 @@
+arch_cfgs:
+  - arch_name: aarch64
+    target_feature: [neon]
+    llvm_prefix: llvm.aarch64.neon
+
+# Repeatedly used anchors 
+# #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+neon-stable: &neon-stable
+  FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+
+# #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+neon-unstable: &neon-unstable
+  FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+
+# #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+neon-v7: &neon-v7
+  FnCall: [cfg_attr, ['target_arch = "arm"', { FnCall: [target_feature, [ 'enable = "v7"']]} ]]
+
+# #[target_feature(enable = "neon,v7")]
+enable-v7: &enable-v7
+  FnCall: [target_feature, ['enable = "neon,v7"']]
+
+# #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+neon-v8: &neon-v8
+  FnCall: [cfg_attr, ['target_arch = "arm"', { FnCall: [target_feature, [ 'enable = "v8"']]} ]]
+
+target-is-arm: &target-is-arm
+  FnCall: [cfg, ['target_arch = "arm"']]
+
+# #[cfg(not(target_arch = "arm"))]
+target-not-arm: &target-not-arm
+  FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm"']]}]]
+
+neon-target-aarch64-arm64ec: &neon-target-aarch64-arm64ec
+  FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]
+
+# #[cfg_attr(not(target_arch = "arm"), stable(feature = "neon_intrinsics", since = "1.59.0"))]
+neon-stable-not-arm: &neon-stable-not-arm
+  FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, *neon-stable]]
+
+#[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
+neon-unstable-is-arm: &neon-unstable-is-arm
+  FnCall: [ cfg_attr, ['target_arch = "arm"', *neon-unstable]] 
+
+# #[cfg_attr(all(test, not(target_env = "msvc"))]
+msvc-disabled: &msvc-disabled
+  FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]
+
+# all(test, target_arch = "arm")
+test-is-arm: &test-is-arm
+  FnCall: [all, [test, 'target_arch = "arm"']]
+
+# #[target_feature(enable = "neon,aes")]
+neon-aes: &neon-aes
+  FnCall: [target_feature, ['enable = "neon,aes"']]
+
+# #[target_feature(enable = "neon,i8mm")]
+neon-i8mm: &neon-i8mm
+  FnCall: [target_feature, ['enable = "neon,i8mm"']]
+
+#[cfg_attr(not(target_arch = "arm"), unstable(feature = "stdarch_neon_i8mm", issue = "117223"))]
+neon-unstable-i8mm: &neon-unstable-i8mm
+  FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']] }, { FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']] } ]]
+
+# #[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+neon-unstable-fcma: &neon-unstable-fcma
+  FnCall: [unstable, ['feature = "stdarch_neon_fcma"', 'issue = "117222"']]
+
+intrinsics:
+  - name: "vand{neon_type.no}"
+    doc: Vector bitwise and
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vand]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [and]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+      - int64x1_t
+      - int64x2_t
+      - uint64x1_t
+      - uint64x2_t
+    compose:
+      - FnCall:
+          - simd_and
+          - - a
+            - b
+
+  - name: "vorr{neon_type.no}"
+    doc: "Vector bitwise or (immediate, inclusive)"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vorr]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [orr]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+      - int64x1_t
+      - int64x2_t
+      - uint64x1_t
+      - uint64x2_t
+    compose:
+      - FnCall:
+          - simd_or
+          - - a
+            - b
+
+  - name: "veor{neon_type.no}"
+    doc: Vector bitwise exclusive or (vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [veor]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [eor]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+      - int64x1_t
+      - int64x2_t
+      - uint64x1_t
+      - uint64x2_t
+    compose:
+      - FnCall:
+          - simd_xor
+          - - a
+            - b
+
+  - name: "vabd{neon_type.no}"
+    doc: Absolute difference between the arguments
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sabd]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "sabd.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.sabd.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vabds.{neon_type}"
+              arch: arm
+
+  - name: "vabd{neon_type.no}"
+    doc: Absolute difference between the arguments
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uabd]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: "uabd.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.uabd.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vabdu.{neon_type}"
+              arch: arm
+
+  - name: "vabd{neon_type.no}"
+    doc: Absolute difference between the arguments of Floating
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.f32"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabd]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "fabd.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vabds.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fabd.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vabdl{neon_type[0].noq}"
+    doc: Signed Absolute difference Long
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - target_feature
+                - - 'enable = "v7"'
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - 'target_arch = "arm"'
+            - FnCall:
+                - assert_instr
+                - - '"vabdl.{neon_type[0]}"'
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - FnCall:
+                      - any
+                      - - 'target_arch = "aarch64"'
+                        - 'target_arch = "arm64ec"'
+            - FnCall:
+                - assert_instr
+                - - sabdl
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - not
+                - - 'target_arch = "arm"'
+            - FnCall:
+                - stable
+                - - 'feature = "neon_intrinsics"'
+                  - 'since = "1.59.0"'
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - unstable
+                - - 'feature = "stdarch_arm_neon_intrinsics"'
+                  - 'issue = "111800"'
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t, uint8x8_t]
+      - [int16x4_t, int32x4_t, uint16x4_t]
+      - [int32x2_t, int64x2_t, uint32x2_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[2]}"
+          - FnCall:
+              - simd_cast
+              - - FnCall:
+                    - "vabd_{neon_type[0]}"
+                    - - a
+                      - b
+      - FnCall:
+          - simd_cast
+          - - c
+
+  - name: "vceq{neon_type[0].no}"
+    doc: "Compare bitwise Equal (vector)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vceq{type[2]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmeq]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint8x8_t, ".i8"]
+      - [uint8x16_t, uint8x16_t, ".i8"]
+      - [int8x8_t, uint8x8_t, ".i8"]
+      - [int8x16_t, uint8x16_t, ".i8"]
+      - [poly8x8_t, uint8x8_t, ".i8"]
+      - [poly8x16_t, uint8x16_t, ".i8"]
+      - [uint16x4_t, uint16x4_t, ".i16"]
+      - [uint16x8_t, uint16x8_t, ".i16"]
+      - [int16x4_t, uint16x4_t, ".i16"]
+      - [int16x8_t, uint16x8_t, ".i16"]
+      - [uint32x2_t, uint32x2_t, ".i32"]
+      - [uint32x4_t, uint32x4_t, ".i32"]
+      - [int32x2_t, uint32x2_t, ".i32"]
+      - [int32x4_t, uint32x4_t, ".i32"]
+    compose:
+      - FnCall: [simd_eq, [a, b]]
+
+  - name: "vceq{neon_type[0].no}"
+    doc: "Floating-point compare equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vceq.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmeq]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - FnCall: [simd_eq, [a, b]]
+
+  - name: "vtst{neon_type[0].no}"
+    doc: "Signed compare bitwise Test bits nonzero"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vtst]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmtst]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)']
+      - [int16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [int32x2_t, uint32x2_t, i32x2, 'i32x2::new(0, 0)']
+      - [int32x4_t, uint32x4_t, i32x4, 'i32x4::new(0, 0, 0, 0)']
+      - [poly8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [poly8x16_t, uint8x16_t, i8x16, 'i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [poly16x4_t, uint16x4_t, i16x4, 'i16x4::new(0, 0, 0, 0)']
+      - [poly16x8_t, uint16x8_t, i16x8, 'i16x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+    compose:
+      - Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}]
+      - Let: [d, "{type[2]}", "{type[3]}"]
+      - FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]]
+
+  - name: "vabs{neon_type.no}"
+    doc: "Floating-point absolute value"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vabs]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fabs]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - FnCall: [simd_fabs, [a]]
+
+  - name: "vcgt{neon_type[0].no}"
+    doc: "Compare signed greater than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.{type[2]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmgt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t, "s8"]
+      - [int8x16_t, uint8x16_t, "s8"]
+      - [int16x4_t, uint16x4_t, s16]
+      - [int16x8_t, uint16x8_t, s16]
+      - [int32x2_t, uint32x2_t, "s32"]
+      - [int32x4_t, uint32x4_t, "s32"]
+    compose:
+      - FnCall: [simd_gt, [a, b]]
+
+  - name: "vcgt{neon_type.no}"
+    doc: "Compare unsigned greater than"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmhi]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - FnCall: [simd_gt, [a, b]]
+
+  - name: "vcgt{neon_type[0].no}"
+    doc: "Floating-point compare greater than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmgt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - FnCall: [simd_gt, [a, b]]
+
+  - name: "vclt{neon_type[0].no}"
+    doc: "Compare signed less than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.{neon_type[0]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmgt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t]
+      - [int8x16_t, uint8x16_t]
+      - [int16x4_t, uint16x4_t]
+      - [int16x8_t, uint16x8_t]
+      - [int32x2_t, uint32x2_t]
+      - [int32x4_t, uint32x4_t]
+    compose:
+      - FnCall: [simd_lt, [a, b]]
+
+  - name: "vcle{neon_type[0].no}"
+    doc: "Compare signed less than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.{neon_type[0]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmge]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t]
+      - [int8x16_t, uint8x16_t]
+      - [int16x4_t, uint16x4_t]
+      - [int16x8_t, uint16x8_t]
+      - [int32x2_t, uint32x2_t]
+      - [int32x4_t, uint32x4_t]
+    compose:
+      - FnCall: [simd_le, [a, b]]
+
+  - name: "vcle{neon_type[0].no}"
+    doc: "Floating-point compare less than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmge]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - FnCall: [simd_le, [a, b]]
+
+  - name: "vcge{neon_type[0].no}"
+    doc: "Compare signed greater than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.{neon_type[0]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmge]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t]
+      - [int8x16_t, uint8x16_t]
+      - [int16x4_t, uint16x4_t]
+      - [int16x8_t, uint16x8_t]
+      - [int32x2_t, uint32x2_t]
+      - [int32x4_t, uint32x4_t]
+    compose:
+      - FnCall: [simd_ge, [a, b]]
+
+  - name: "vcls{neon_type.no}"
+    doc: "Count leading sign bits"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcls.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cls]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "vcls{neon_type.no}"
+          links:
+            - link: "llvm.arm.neon.vcls.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.cls.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vcls{neon_type[0].no}"
+    doc: "Count leading sign bits"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcls]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cls]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [uint8x16_t, int8x16_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x2_t, int32x2_t]
+      - [uint32x4_t, int32x4_t]
+    compose:
+      - FnCall:
+          - "vcls{neon_type[1].no}"
+          - - FnCall: [transmute, [a]]
+
+  - name: "vclz{neon_type.no}"
+    doc: "Count leading zero bits"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vclz.i8"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [clz]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+    compose:
+      - FnCall: ["vclz{neon_type.no}_", [a]]
+
+  - name: "vclz{neon_type[0].no}"
+    doc: "Count leading zero bits"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vclz.i8"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [clz]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [uint8x16_t, int8x16_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vclz{neon_type[1].no}_"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vclz{neon_type[0].no}"
+    doc: "Count leading zero bits"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vclz{type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [clz]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, '.i16']
+      - [int16x8_t, '.i16']
+      - [int32x2_t, '.i32']
+      - [int32x4_t, '.i32']
+    compose:
+      - FnCall: ["vclz{neon_type[0].no}_", [a]]
+
+  - name: "vclz{neon_type[0].no}"
+    doc: "Count leading zero bits"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vclz{type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [clz]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, '.i32', int32x2_t]
+      - [uint32x4_t, '.i32', int32x4_t]
+      - [uint16x4_t, '.i16', int16x4_t]
+      - [uint16x8_t, '.i16', int16x8_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vclz{neon_type[2].no}_"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vcagt{neon_type[0].no}"
+    doc: "Floating-point absolute compare greater than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacgt.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facgt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vcagt{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vacgt.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.facgt.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcage{neon_type[0].no}"
+    doc: "Floating-point absolute compare greater than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacge.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facge]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vcage{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vacge.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.facge.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcalt{neon_type[0].no}"
+    doc: "Floating-point absolute compare less than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacgt.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facgt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - FnCall: ["vcagt{neon_type[0].no}", [b, a]]
+
+  - name: "vcale{neon_type[0].no}"
+    doc: "Floating-point absolute compare less than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vacge.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [facge]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - FnCall: ["vcage{neon_type[0].no}", [b, a]]
+
+  - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [scvtf]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, float32x2_t]
+      - [int32x4_t, float32x4_t]
+    compose:
+      - FnCall: [simd_cast, [a]]
+
+  - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ucvtf]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, float32x2_t]
+      - [uint32x4_t, float32x4_t]
+    compose:
+      - FnCall: [simd_cast, [a]]
+
+  - name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, float32x2_t]
+      - [uint32x4_t, float32x4_t]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
+
+  - name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, float32x2_t]
+      - [int32x4_t, float32x4_t]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.arm.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]]
+
+  - name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, float32x2_t]
+      - [int32x4_t, float32x4_t]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.vcvtfxs2fp.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", [a, N]]
+
+  - name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+    doc: "Fixed-point convert to floating-point"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, float32x2_t]
+      - [uint32x4_t, float32x4_t]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vcvt{neon_type[1].N}_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Floating-point convert to fixed-point, rounding toward zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, int32x2_t, _n_s32_f32]
+      - [float32x4_t, int32x4_t, q_n_s32_f32]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments: ["a: {type[0]}", "n: i32"]
+          links:
+            - link: "llvm.arm.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+      - FnCall: ["_vcvt{type[2]}", [a, N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Floating-point convert to fixed-point, rounding toward zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vcvt, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t, _n_u32_f32]
+      - [float32x4_t, uint32x4_t, q_n_u32_f32]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments: ["a: {type[0]}", "n: i32"]
+          links:
+            - link: "llvm.arm.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+      - FnCall: ["_vcvt{type[2]}", [a, N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Floating-point convert to fixed-point, rounding toward zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, int32x2_t, _n_s32_f32]
+      - [float32x4_t, int32x4_t, q_n_s32_f32]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments: ["a: {type[0]}", "n: i32"]
+          links:
+            - link: "llvm.aarch64.neon.vcvtfp2fxs.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{type[2]}", [a, N]]
+
+  - name: "vcvt{type[2]}"
+    doc: "Floating-point convert to fixed-point, rounding toward zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t, _n_u32_f32]
+      - [float32x4_t, uint32x4_t, q_n_u32_f32]
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= 32']]
+      - LLVMLink:
+          name: "vcvt{type[2]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.vcvtfp2fxu.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vcvt{type[2]}", [a, N]]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.8"', 'N = 4']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 4']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_u8, uint8x8_t, uint8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [q_lane_u8, uint8x8_t, uint8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_p8, poly8x8_t, poly8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [q_lane_p8, poly8x8_t, poly8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
+      - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.8"', 'N = 8']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 8']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_u8, uint8x16_t, uint8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_laneq_u8, uint8x16_t, uint8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_p8, poly8x16_t, poly8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_laneq_p8, poly8x16_t, poly8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
+      - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
+      - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
+      - [q_lane_u16, uint16x4_t, uint16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_p16, poly16x4_t, poly16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
+      - [q_lane_p16, poly16x4_t, poly16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
+      - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.16"', 'N = 4']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 4']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_laneq_u16, uint16x8_t, uint16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_p16, poly16x8_t, poly16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_laneq_p16, poly16x8_t, poly16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
+      - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.32"', 'N = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s32, int32x2_t, int32x2_t, '1', '[N as u32, N as u32]']
+      - [q_lane_s32, int32x2_t, int32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[N as u32, N as u32]']
+      - [q_lane_u32, uint32x2_t, uint32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_f32, float32x2_t, float32x2_t, '1', '[N as u32, N as u32]']
+      - [q_lane_f32, float32x2_t, float32x4_t, '1', '[N as u32, N as u32, N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
+      - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vdup.32"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
+      - [_laneq_s32, int32x4_t, int32x2_t, '2', '[N as u32, N as u32]']
+      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
+      - [_laneq_u32, uint32x4_t, uint32x2_t, '2', '[N as u32, N as u32]']
+      - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
+      - [_laneq_f32, float32x4_t, float32x2_t, '2', '[N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
+      - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmov, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [q_laneq_s64, int64x2_t, '1', '[N as u32, N as u32]']
+      - [q_laneq_u64, uint64x2_t, '1', '[N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
+      - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmov, 'N = 0']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [dup, 'N = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [q_lane_s64, int64x1_t, int64x2_t]
+      - [q_lane_u64, uint64x1_t, uint64x2_t]
+    compose:
+      - FnCall: [static_assert!, ['N == 0']]
+      - FnCall: [simd_shuffle!, [a, a, '[N as u32, N as u32]']]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s64, int64x1_t]
+      - [_lane_u64, uint64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['N == 0']]
+      - Identifier: [a, Symbol]
+
+  - name: "vdup{type[0]}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmov, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_laneq_s64, int64x2_t, int64x1_t, '::<i64, _>']
+      - [_laneq_u64, uint64x2_t, uint64x1_t, '::<u64, _>']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, 1]]
+      - FnCall:
+          - "transmute{type[3]}"
+          - - FnCall: [simd_extract!, [a, 'N as u32']]
+
+  - name: "vext{neon_type[0].no}"
+    doc: "Extract vector from pair of vectors"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ext, 'N = 7']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [int16x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [uint8x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [uint16x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [poly8x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [poly16x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+    compose:
+      - Identifier: ["{type[1]}", Symbol]
+
+  - name: "vext{neon_type[0].no}"
+    doc: "Extract vector from pair of vectors"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vext.8"', 'N = 15']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ext, 'N = 15']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, ' static_assert_uimm_bits!(N, 4); match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
+      - [uint8x16_t, ' static_assert_uimm_bits!(N, 4); match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
+      - [poly8x16_t, ' static_assert_uimm_bits!(N, 4); match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
+    compose:
+      - Identifier: ["{type[1]}", Symbol]
+
+  - name: "vext{neon_type[0].no}"
+    doc: "Extract vector from pair of vectors"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ext, 'N = 3']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, 'static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [int32x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [uint16x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [uint32x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [poly16x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [float32x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+    compose:
+      - Identifier: ["{type[1]}", Symbol]
+
+  - name: "vext{neon_type[0].no}"
+    doc: "Extract vector from pair of vectors"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vext.8"', 'N = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ext, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [uint32x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [float32x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+    compose:
+      - Identifier: ["{type[1]}", Symbol]
+
+  - name: "vext{neon_type[0].no}"
+    doc: "Extract vector from pair of vectors"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmov, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ext, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x2_t, 'static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [uint64x2_t, 'static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+    compose:
+      - Identifier: ["{type[1]}", Symbol]
+
+  - name: "vmla{neon_type[0].no}"
+    doc: "Multiply-add to accumulator"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmla{type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mla]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, ".i8"]
+      - [int8x16_t, ".i8"]
+      - [uint8x8_t, ".i8"]
+      - [uint8x16_t, ".i8"]
+      - [int16x4_t, ".i16"]
+      - [int16x8_t, ".i16"]
+      - [uint16x4_t, ".i16"]
+      - [uint16x8_t, ".i16"]
+      - [int32x2_t, ".i32"]
+      - [int32x4_t, ".i32"]
+      - [uint32x2_t, ".i32"]
+      - [uint32x4_t, ".i32"]
+    compose:
+      - FnCall: [simd_add, [a, {FnCall: [simd_mul, [b, c]]}]]
+
+  - name: "vmla{neon_type.no}"
+    doc: "Floating-point multiply-add to accumulator"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmla.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - FnCall: [simd_add, [a, {FnCall: [simd_mul, [b, c]]}]]
+
+  - name: "vmlal{neon_type[1].no}"
+    doc: "Signed multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlal.{type[2]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [smlal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, "s8"]
+      - [int32x4_t, int16x4_t, "s16"]
+      - [int64x2_t, int32x2_t, "s32"]
+    compose:
+      - FnCall: [simd_add, [a, {FnCall: ["vmull_{type[2]}", [b, c]]}]]
+
+  - name: "vmlal_n_{type[4]}"
+    doc: "Vector widening multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlal.{type[4]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [smlal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, "i16", int32x4_t, 's16']
+      - [int64x2_t, int32x2_t, "i32", int64x2_t, 's32']
+    compose:
+      - FnCall:
+          - "vmlal{neon_type[1].noq}"
+          - - a
+            - b
+            - FnCall: ["vdup_n_{neon_type[1]}", [c]]
+
+  - name: "vmlal_n_{type[2]}"
+    doc: "Vector widening multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlal.{type[2]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [umlal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, uint16x4_t, "u16", uint32x4_t]
+      - [uint64x2_t, uint32x2_t, "u32", uint64x2_t]
+    compose:
+      - FnCall:
+          - "vmlal{neon_type[1].noq}"
+          - - a
+            - b
+            - FnCall: ["vdup_n_{neon_type[1]}", [c]]
+
+  - name: "vmlal_lane{neon_type[2].no}"
+    doc: "Vector widening multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlal.{neon_type[1]}"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [smlal, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int64x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [int64x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vmlal_{neon_type[1]}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+
+  - name: "vmlal_lane{neon_type[2].no}"
+    doc: "Vector widening multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlal.{neon_type[1]}"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [umlal, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint64x2_t, uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [uint64x2_t, uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
+      - FnCall:
+          - "vmlal_{neon_type[1]}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, '{type[5]}']]
+
+  - name: "vmlal_{neon_type[1]}"
+    doc: "Unsigned multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlal.{neon_type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [umlal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t]
+      - [uint32x4_t, uint16x4_t]
+      - [uint64x2_t, uint32x2_t]
+    compose:
+      - FnCall:
+          - simd_add
+          - - a
+            - FnCall: ["vmull_{neon_type[1]}", [b, c]]
+
+  - name: "vmls{neon_type[0].no}"
+    doc: "Multiply-subtract from accumulator"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmls{type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mls]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, '.i8']
+      - [int8x16_t, '.i8']
+      - [uint8x8_t, '.i8']
+      - [uint8x16_t, '.i8']
+      - [int16x4_t, ".i16"]
+      - [int16x8_t, ".i16"]
+      - [uint16x4_t, ".i16"]
+      - [uint16x8_t, ".i16"]
+      - [int32x2_t, ".i32"]
+      - [int32x4_t, ".i32"]
+      - [uint32x2_t, ".i32"]
+      - [uint32x4_t, ".i32"]
+    compose:
+      - FnCall:
+          - simd_sub
+          - - a
+            - FnCall: [simd_mul, [b, c]]
+
+  - name: "vmlsl_{neon_type[1]}"
+    doc: "Signed multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [smlsl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t]
+      - [int32x4_t, int16x4_t]
+      - [int64x2_t, int32x2_t]
+    compose:
+      - FnCall: [simd_sub, [a, {FnCall: ["vmull_{neon_type[1]}", [b, c]]}]]
+
+  - name: "vmlsl_n_{neon_type[1]}"
+    doc: "Vector widening multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [smlsl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, "i16"]
+      - [int64x2_t, int32x2_t, "i32"]
+    compose:
+      - FnCall: ["vmlsl_{neon_type[1]}", [a, b, {FnCall: ["vdup_n_{neon_type[1]}", [c]]}]]
+
+  - name: "vmlsl_n_{neon_type[1]}"
+    doc: "Vector widening multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [umlsl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, uint16x4_t, "u16"]
+      - [uint64x2_t, uint32x2_t, "u32"]
+    compose:
+      - FnCall: ["vmlsl_{neon_type[1]}", [a, b, {FnCall: ["vdup_n_{neon_type[1]}", [c]]}]]
+
+  - name: "vmlsl_lane{neon_type[2].no}"
+    doc: "Vector widening multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [smlsl, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - "vmlsl_{neon_type[1]}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vmlsl_lane{neon_type[2].no}"
+    doc: "Vector widening multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [smlsl, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x2_t, int32x2_t, int32x2_t, '[LANE as u32, LANE as u32]', '1']
+      - [int64x2_t, int32x2_t, int32x4_t, '[LANE as u32, LANE as u32]', '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
+      - FnCall:
+          - "vmlsl_{neon_type[1]}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+
+  - name: "vmlsl_lane{neon_type[2].no}"
+    doc: "Vector widening multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [umlsl, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint64x2_t, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [uint64x2_t, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vmlsl_{neon_type[1]}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vmlsl_{neon_type[1]}"
+    doc: "Unsigned multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmlsl.{neon_type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [umlsl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t]
+      - [uint32x4_t, uint16x4_t]
+      - [uint64x2_t, uint32x2_t]
+    compose:
+      - FnCall: [simd_sub, [a, {FnCall: ["vmull_{neon_type[1]}", [b, c]]}]]
+
+  - name: "vneg{neon_type[0].no}"
+    doc: Negate
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vneg.{type[1]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [neg]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 's8']
+      - [int8x16_t, 's8']
+      - [int16x4_t, 's16']
+      - [int16x8_t, 's16']
+      - [int32x2_t, 's32']
+      - [int32x4_t, 's32']
+    compose:
+      - FnCall: [simd_neg, [a]]
+
+  - name: "vneg{neon_type[0].no}"
+    doc: Negate
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vneg.{type[1]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fneg]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, 'f32']
+      - [float32x4_t, 'f32']
+    compose:
+      - FnCall: [simd_neg, [a]]
+
+  - name: "vqneg{neon_type[0].no}"
+    doc: Signed saturating negate
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqneg.{type[1]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqneg]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 's8', 'i8']
+      - [int8x16_t, 's8', 'i8']
+      - [int16x4_t, 's16', 'i16']
+      - [int16x8_t, 's16', 'i16']
+      - [int32x2_t, 's32', 'i32']
+      - [int32x4_t, 's32', 'i32']
+    compose:
+      - LLVMLink:
+          name: "sqneg.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.sqneg.v{neon_type[0].lane}{type[2]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vqneg.v{neon_type[0].lane}{type[2]}"
+              arch: arm
+
+  - name: "vqsub{neon_type[0].no}"
+    doc: Saturating subtract
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqsub.{type[1]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uqsub]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, u8, i8]
+      - [uint8x16_t, u8, i8]
+      - [uint16x4_t, u16, i16]
+      - [uint16x8_t, u16, i16]
+      - [uint32x2_t, u32, i32]
+      - [uint32x4_t, u32, i32]
+      - [uint64x1_t, u64, i64]
+      - [uint64x2_t, u64, i64]
+    compose:
+      - LLVMLink:
+          name: "uqsub.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.uqsub.v{neon_type[0].lane}{type[2]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.usub.sat.{neon_type[0].lane}{type[2]}"
+              arch: arm
+
+  - name: "vqsub{neon_type[0].no}"
+    doc: Saturating subtract
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqsub.{type[1]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqsub]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, s8, i8]
+      - [int8x16_t, s8, i8]
+      - [int16x4_t, s16, i16]
+      - [int16x8_t, s16, i16]
+      - [int32x2_t, s32, i32]
+      - [int32x4_t, s32, i32]
+      - [int64x1_t, s64, i64]
+      - [int64x2_t, s64, i64]
+    compose:
+      - LLVMLink:
+          name: "sqsub.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.sqsub.v{neon_type[0].lane}{type[2]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.ssub.sat.{neon_type[0].lane}{type[2]}"
+              arch: arm
+
+  - name: "vhadd{neon_type.no}"
+    doc: Halving add
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - target_feature
+                - - 'enable = "v7"'
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - 'target_arch = "arm"'
+            - FnCall:
+                - assert_instr
+                - - '"vhadd.{neon_type}"'
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - FnCall:
+                      - any
+                      - - 'target_arch = "aarch64"'
+                        - 'target_arch = "arm64ec"'
+            - FnCall:
+                - assert_instr
+                - - uhadd
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - not
+                - - 'target_arch = "arm"'
+            - FnCall:
+                - stable
+                - - 'feature = "neon_intrinsics"'
+                  - 'since = "1.59.0"'
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - unstable
+                - - 'feature = "stdarch_arm_neon_intrinsics"'
+                  - 'issue = "111800"'
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: "uhadd.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.uhadd.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vhaddu.{neon_type}"
+              arch: arm
+
+  - name: "vhadd{neon_type.no}"
+    doc: Halving add
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - target_feature
+                - - 'enable = "v7"'
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - 'target_arch = "arm"'
+            - FnCall:
+                - assert_instr
+                - - '"vhadd.{neon_type}"'
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - FnCall:
+                      - any
+                      - - 'target_arch = "aarch64"'
+                        - 'target_arch = "arm64ec"'
+            - FnCall:
+                - assert_instr
+                - - shadd
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - not
+                - - 'target_arch = "arm"'
+            - FnCall:
+                - stable
+                - - 'feature = "neon_intrinsics"'
+                  - 'since = "1.59.0"'
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - unstable
+                - - 'feature = "stdarch_arm_neon_intrinsics"'
+                  - 'issue = "111800"'
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "shadd.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.shadd.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vhadds.{neon_type}"
+              arch: arm
+
+  - name: "vrhadd{neon_type.no}"
+    doc: Rounding halving add
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vrhadd.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [srhadd]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "vrhadd.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.srhadd.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vrhadds.{neon_type}"
+              arch: arm
+
+  - name: "vrhadd{neon_type.no}"
+    doc: Rounding halving add
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vrhadd.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [urhadd]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: "vrhaddu.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.urhadd.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vrhaddu.{neon_type}"
+              arch: arm
+
+  - name: "vrndn{neon_type.no}"
+    doc: "Floating-point round to integral, to nearest with ties to even"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrintn]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frintn]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "llvm.frinn.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.frintn.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vrintn.{neon_type}"
+              arch: arm
+
+  - name: "vqadd{neon_type.no}"
+    doc: Saturating add
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqadd.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uqadd]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+      - uint64x1_t
+      - uint64x2_t
+    compose:
+      - LLVMLink:
+          name: "uqadd.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.uqadd.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.uadd.sat.{neon_type}"
+              arch: arm
+
+  - name: "vqadd{neon_type.no}"
+    doc: Saturating add
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqadd.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqadd]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+      - int64x1_t
+      - int64x2_t
+    compose:
+      - LLVMLink:
+          name: "sqadd.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.sqadd.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.sadd.sat.{neon_type}"
+              arch: arm
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const f32", float32x2x2_t]
+      - ["*const f32", float32x4x2_t]
+      - ["*const f32", float32x2x3_t]
+      - ["*const f32", float32x4x3_t]
+      - ["*const f32", float32x2x4_t]
+      - ["*const f32", float32x4x4_t]
+    compose:
+      - LLVMLink:
+          name: "vld1x{neon_type[1].tuple}.{neon_type[1]}"
+          links:
+            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0f{neon_type[1].base}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0f{neon_type[1].base}"
+              arch: arm
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x2_t]
+      - ["*const i8", int8x16x2_t]
+      - ["*const i8", int8x8x3_t]
+      - ["*const i8", int8x16x3_t]
+      - ["*const i8", int8x8x4_t]
+      - ["*const i8", int8x16x4_t]
+      - ["*const i16", int16x4x2_t]
+      - ["*const i16", int16x8x2_t]
+      - ["*const i16", int16x4x3_t]
+      - ["*const i16", int16x8x3_t]
+      - ["*const i16", int16x4x4_t]
+      - ["*const i16", int16x8x4_t]
+      - ["*const i32", int32x2x2_t]
+      - ["*const i32", int32x4x2_t]
+      - ["*const i32", int32x2x3_t]
+      - ["*const i32", int32x4x3_t]
+      - ["*const i32", int32x2x4_t]
+      - ["*const i32", int32x4x4_t]
+      - ["*const i64", int64x1x2_t]
+      - ["*const i64", int64x1x3_t]
+      - ["*const i64", int64x1x4_t]
+      - ["*const i64", int64x2x2_t]
+      - ["*const i64", int64x2x3_t]
+      - ["*const i64", int64x2x4_t]
+    compose:
+      - LLVMLink:
+          name: "ld1x{neon_type[1].tuple}.{neon_type[1]}"
+          links:
+            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0i{neon_type[1].base}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0i{neon_type[1].base}"
+              arch: arm
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u8", uint8x8x2_t, int8x8x2_t]
+      - ["*const u8", uint8x16x2_t, int8x16x2_t]
+      - ["*const u8", uint8x8x3_t, int8x8x3_t]
+      - ["*const u8", uint8x16x3_t, int8x16x3_t]
+      - ["*const u8", uint8x8x4_t, int8x8x4_t]
+      - ["*const u8", uint8x16x4_t, int8x16x4_t]
+      - ["*const u16", uint16x4x2_t, int16x4x2_t]
+      - ["*const u16", uint16x8x2_t, int16x8x2_t]
+      - ["*const u16", uint16x4x3_t, int16x4x3_t]
+      - ["*const u16", uint16x8x3_t, int16x8x3_t]
+      - ["*const u16", uint16x4x4_t, int16x4x4_t]
+      - ["*const u16", uint16x8x4_t, int16x8x4_t]
+      - ["*const u32", uint32x2x2_t, int32x2x2_t]
+      - ["*const u32", uint32x4x2_t, int32x4x2_t]
+      - ["*const u32", uint32x2x3_t, int32x2x3_t]
+      - ["*const u32", uint32x4x3_t, int32x4x3_t]
+      - ["*const u32", uint32x2x4_t, int32x2x4_t]
+      - ["*const u32", uint32x4x4_t, int32x4x4_t]
+      - ["*const u64", uint64x1x2_t, int64x1x2_t]
+      - ["*const u64", uint64x1x3_t, int64x1x3_t]
+      - ["*const u64", uint64x1x4_t, int64x1x4_t]
+      - ["*const u64", uint64x2x2_t, int64x2x2_t]
+      - ["*const u64", uint64x2x3_t, int64x2x3_t]
+      - ["*const u64", uint64x2x4_t, int64x2x4_t]
+      - ["*const p8", poly8x8x2_t, int8x8x2_t]
+      - ["*const p8", poly8x8x3_t, int8x8x3_t]
+      - ["*const p8", poly8x8x4_t, int8x8x4_t]
+      - ["*const p8", poly8x16x2_t, int8x16x2_t]
+      - ["*const p8", poly8x16x3_t, int8x16x3_t]
+      - ["*const p8", poly8x16x4_t, int8x16x4_t]
+      - ["*const p16", poly16x4x2_t, int16x4x2_t]
+      - ["*const p16", poly16x4x3_t, int16x4x3_t]
+      - ["*const p16", poly16x4x4_t, int16x4x4_t]
+      - ["*const p16", poly16x8x2_t, int16x8x2_t]
+      - ["*const p16", poly16x8x3_t, int16x8x3_t]
+      - ["*const p16", poly16x8x4_t, int16x8x4_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld1{neon_type[2].no}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x1x3_t, int64x1x3_t]
+      - ["*const p64", poly64x1x4_t, int64x1x4_t]
+      - ["*const p64", poly64x2x2_t, int64x2x2_t]
+      - ["*const p64", poly64x2x3_t, int64x2x3_t]
+      - ["*const p64", poly64x2x4_t, int64x2x4_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld1{neon_type[2].no}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld1{neon_type[2].no}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vld2]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x2_t, i8]
+      - ["*const i16", int16x4x2_t, i16]
+      - ["*const i32", int32x2x2_t, i32]
+      - ["*const i8", int8x16x2_t, i8]
+      - ["*const i16", int16x8x2_t, i16]
+      - ["*const i32", int32x4x2_t, i32]
+      - ["*const f32", float32x2x2_t, f32]
+      - ["*const f32", float32x4x2_t, f32]
+    compose:
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "ptr: *const i8"
+            - "size: i32"
+          links:
+            - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}.p0i8"
+              arch: arm
+      - FnCall:
+          - "_vld2{neon_type[1].nox}"
+          - - "a as *const i8"
+            - "{neon_type[1].base_byte_size}"
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x1x2_t, i64]
+    compose:
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "ptr: *const i8"
+            - "size: i32"
+          links:
+            - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}.p0i8"
+              arch: arm
+      - FnCall:
+          - "_vld2{neon_type[1].nox}"
+          - - "a as *const i8"
+            - "{neon_type[1].base_byte_size}"
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [ld2]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x2_t, i8, int8x8_t]
+      - ["*const i16", int16x4x2_t, i16, int16x4_t]
+      - ["*const i32", int32x2x2_t, i32, int32x2_t]
+      - ["*const i8", int8x16x2_t, i8, int8x16_t]
+      - ["*const i16", int16x8x2_t, i16, int16x8_t]
+      - ["*const i32", int32x4x2_t, i32, int32x4_t]
+      - ["*const f32", float32x2x2_t, f32, float32x2_t]
+      - ["*const f32", float32x4x2_t, f32, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "ptr: *const {neon_type[3]}"
+          links:
+            - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vld2{neon_type[1].nox}"
+          - - "a as _"
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x1x2_t, i64, int64x1_t]
+    compose:
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "ptr: *const {neon_type[3]}"
+          links:
+            - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vld2{neon_type[1].nox}"
+          - - "a as _"
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u8", uint8x8x2_t, int8x8x2_t]
+      - ["*const u16", uint16x4x2_t, int16x4x2_t]
+      - ["*const u32", uint32x2x2_t, int32x2x2_t]
+      - ["*const u8", uint8x16x2_t, int8x16x2_t]
+      - ["*const u16", uint16x8x2_t, int16x8x2_t]
+      - ["*const u32", uint32x4x2_t, int32x4x2_t]
+      - ["*const p8", poly8x8x2_t, int8x8x2_t]
+      - ["*const p16", poly16x4x2_t, int16x4x2_t]
+      - ["*const p8", poly8x16x2_t, int8x16x2_t]
+      - ["*const p16", poly16x8x2_t, int16x8x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].nox}"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].nox}"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vld2{neon_type[1].nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].nox}"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - FnCall:
+          - cfg_attr
+          - - test
+            - FnCall:
+                - assert_instr
+                - - vld2
+                  - "LANE = 0"
+      - FnCall:
+          - rustc_legacy_const_generics
+          - - "2"
+      - *neon-unstable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x2_t, i8, int8x8_t, "3"]
+      - ["*const i16", int16x4x2_t, i16, int16x4_t, "2"]
+      - ["*const i32", int32x2x2_t, i32, int32x2_t, "1"]
+    compose:
+      - FnCall:
+          - "static_assert_uimm_bits!"
+          - - LANE
+            - "{type[4]}"
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "ptr: *const i8"
+            - "a: {neon_type[3]}"
+            - "b: {neon_type[3]}"
+            - "n: i32"
+            - "size: i32"
+          links:
+            - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0i8"
+              arch: arm
+      - FnCall:
+          - "_vld2_lane{neon_type[1].nox}"
+          - - "a as _"
+            - "b.0"
+            - "b.1"
+            - "LANE"
+            - "{neon_type[1].base_byte_size}"
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u8", uint8x8x2_t, int8x8_t, "3"]
+      - ["*const u16", uint16x4x2_t, int16x4_t, "2"]
+      - ["*const u32", uint32x2x2_t, int32x2_t, "1"]
+      - ["*const u16", uint16x8x2_t, int16x8_t, "3"]
+      - ["*const u32", uint32x4x2_t, int32x4_t, "2"]
+      - ["*const p8", poly8x8x2_t, int8x8_t, "3"]
+      - ["*const p16", poly16x4x2_t, int16x4_t, "2"]
+      - ["*const p16", poly16x8x2_t, int16x8_t, "3"]
+    compose:
+      - FnCall:
+          - "static_assert_uimm_bits!"
+          - - LANE
+            - "{type[3]}"
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].lane_nox}::<LANE>"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-not-arm
+      - FnCall:
+          - cfg_attr
+          - - test
+            - FnCall:
+                - assert_instr
+                - - ld2
+                  - "LANE = 0"
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x2_t, i8, int8x8_t, "3"]
+      - ["*const i16", int16x4x2_t, i16, int16x4_t, "2"]
+      - ["*const i32", int32x2x2_t, i32, int32x2_t, "1"]
+      - ["*const i16", int16x8x2_t, i16, int16x8_t, "3"]
+      - ["*const i32", int32x4x2_t, i32, int32x4_t, "2"]
+      - ["*const f32", float32x2x2_t, f32, float32x2_t, "2"]
+      - ["*const f32", float32x4x2_t, f32, float32x4_t, "2"]
+    compose:
+      - FnCall:
+          - "static_assert_uimm_bits!"
+          - - LANE
+            - "{type[4]}"
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "a: {neon_type[3]}"
+            - "b: {neon_type[3]}"
+            - "n: i64"
+            - "ptr: *const i8"
+          links:
+            - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0i8"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vld2{neon_type[1].lane_nox}"
+          - - "b.0"
+            - "b.1"
+            - "LANE as i64"
+            - "a as _"
+
+  - name: "vld2{neon_type[1].lane_nox}"
+    doc: Load multiple 2-element structures to two registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - FnCall:
+          - cfg_attr
+          - - test
+            - FnCall:
+                - assert_instr
+                - - vld2
+                  - "LANE = 0"
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-unstable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i16", int16x8x2_t, i16, int16x8_t, "3"]
+      - ["*const i32", int32x4x2_t, i32, int32x4_t, "2"]
+      - ["*const f32", float32x2x2_t, f32, float32x2_t, "1"]
+      - ["*const f32", float32x4x2_t, f32, float32x4_t, "2"]
+    compose:
+      - FnCall:
+          - "static_assert_uimm_bits!"
+          - - LANE
+            - "{type[4]}"
+      - LLVMLink:
+          name: "vld2.{neon_type[1]}"
+          arguments:
+            - "ptr: *const i8"
+            - "a: {neon_type[3]}"
+            - "b: {neon_type[3]}"
+            - "n: i32"
+            - "size: i32"
+          links:
+            - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0i8"
+              arch: arm
+      - FnCall:
+          - "_vld2{neon_type[1].lane_nox}"
+          - - "a as _"
+            - "b.0"
+            - "b.1"
+            - "LANE"
+            - "{neon_type[1].base_byte_size}"
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x1x2_t, i64]
+    compose:
+      - LLVMLink:
+          name: "vld2dup.{neon_type[1]}"
+          arguments:
+            - "ptr: *const i8"
+            - "size: i32"
+          links:
+            - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0i8"
+              arch: arm
+      - FnCall:
+          - "_vld2{neon_type[1].dup_nox}"
+          - - "a as *const i8"
+            - "{neon_type[1].base_byte_size}"
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [ld2r]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x1x2_t, i64]
+    compose:
+      - LLVMLink:
+          name: "vld2dup.{neon_type[1]}"
+          arguments:
+            - "ptr: *const i64"
+          links:
+            - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0i64"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vld2{neon_type[1].dup_nox}"
+          - - "a as _"
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vld2]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x2_t, i8]
+      - ["*const i16", int16x4x2_t, i16]
+      - ["*const i32", int32x2x2_t, i32]
+      - ["*const i8", int8x16x2_t, i8]
+      - ["*const i16", int16x8x2_t, i16]
+      - ["*const i32", int32x4x2_t, i32]
+      - ["*const f32", float32x2x2_t, f32]
+      - ["*const f32", float32x4x2_t, f32]
+    compose:
+      - LLVMLink:
+          name: "vld2dup.{neon_type[1]}"
+          arguments:
+            - "ptr: *const i8"
+            - "size: i32"
+          links:
+            - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0i8"
+              arch: arm
+      - FnCall:
+          - "_vld2{neon_type[1].dup_nox}"
+          - - "a as *const i8"
+            - "{neon_type[1].base_byte_size}"
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u8", uint8x8x2_t, int8x8x2_t]
+      - ["*const u16", uint16x4x2_t, int16x4x2_t]
+      - ["*const u32", uint32x2x2_t, int32x2x2_t]
+      - ["*const u8", uint8x16x2_t, int8x16x2_t]
+      - ["*const u16", uint16x8x2_t, int16x8x2_t]
+      - ["*const u32", uint32x4x2_t, int32x4x2_t]
+      - ["*const p8", poly8x8x2_t, int8x8x2_t]
+      - ["*const p16", poly16x4x2_t, int16x4x2_t]
+      - ["*const p8", poly8x16x2_t, int8x16x2_t]
+      - ["*const p16", poly16x8x2_t, int16x8x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].dup_nox}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].dup_nox}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vld2{neon_type[2].dup_nox}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld2{neon_type[1].dup_nox}"
+    doc: Load single 2-element structure and replicate to all lanes of two registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - FnCall:
+          - cfg
+          - - FnCall:
+                - not
+                - - 'target_arch = "arm"'
+      - *neon-stable
+    assert_instr: [ld2r]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x2_t, i8]
+      - ["*const i16", int16x4x2_t, i16]
+      - ["*const i32", int32x2x2_t, i32]
+      - ["*const i8", int8x16x2_t, i8]
+      - ["*const i16", int16x8x2_t, i16]
+      - ["*const i32", int32x4x2_t, i32]
+      - ["*const f32", float32x2x2_t, f32]
+      - ["*const f32", float32x4x2_t, f32]
+    compose:
+      - LLVMLink:
+          name: "vld2dup.{neon_type[1]}"
+          arguments:
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0{type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vld2{neon_type[1].dup_nox}"
+          - - "a as _"
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: "Load multiple 3-element structures to two registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-not-arm
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x8x3_t, int8x8_t, i8, '3']
+      - ['*const i16', int16x8x3_t, int16x8_t, i16, '4']
+      - ['*const i32', int32x4x3_t, int32x4_t, i32, '2']
+      - ['*const i16', int16x4x3_t, int16x4_t, i16, '2']
+      - ['*const i32', int32x2x3_t, int32x2_t, i32, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
+      - LLVMLink:
+          name: 'ld3lane.{neon_type[2]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *const i8'
+          links:
+            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+      - *target-not-arm
+    assert_instr: [ld3]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x8x3_t, '*const int8x8_t', i8]
+      - ['*const i16', int16x4x3_t, '*const int16x4_t', i16]
+      - ['*const i32', int32x2x3_t, '*const int32x2_t', i32]
+      - ['*const i8', int8x16x3_t, '*const int8x16_t', i8]
+      - ['*const i16', int16x8x3_t, '*const int16x8_t', i16]
+      - ['*const i32', int32x4x3_t, '*const int32x4_t', i32]
+      - ['*const f32', float32x2x3_t, '*const float32x2_t', f32]
+      - ['*const f32', float32x4x3_t, '*const float32x4_t', f32]
+    compose:
+      - LLVMLink:
+          name: 'vld3{neon_type[1].nox}'
+          arguments:
+            - 'ptr: {type[2]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0v{neon_type[1].lane}{type[3]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+      - *target-not-arm
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i64', int64x1x3_t, '*const int64x1_t', i64]
+    compose:
+      - LLVMLink:
+          name: "vld3{neon_type[1].nox}"
+          arguments:
+            - 'ptr: {type[2]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0v{neon_type[1].lane}{type[3]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [vld3]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x8x3_t, i8]
+      - ['*const i16', int16x4x3_t, i16]
+      - ['*const i32', int32x2x3_t, i32]
+      - ['*const i8', int8x16x3_t, i8]
+      - ['*const i16', int16x8x3_t, i16]
+      - ['*const i32', int32x4x3_t, i32]
+      - ['*const f32', float32x2x3_t, f32]
+      - ['*const f32', float32x4x3_t, f32]
+    compose:
+      - LLVMLink:
+          name: 'vld3{neon_type[1].nox}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0i8'
+              arch: arm
+      - FnCall: ['_vld3{neon_type[1].nox}', ['a as *const i8', '{neon_type[1].base_byte_size}']]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [nop]
+    types:
+      - ['*const i64', int64x1x3_t, i64]
+    safety:
+      unsafe: [neon]
+    compose:
+      - LLVMLink:
+          name: 'vld3{neon_type[1].nox}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0i8'
+              arch: arm
+      - FnCall: ['_vld3{neon_type[1].nox}', ['a as *const i8', '{neon_type[1].base_byte_size}']]
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-not-arm
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const f32', float32x4x3_t, float32x4_t, f32, '2']
+      - ['*const f32', float32x2x3_t, float32x2_t, f32, '1']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[4]}']]
+      - LLVMLink:
+          name: 'vld3{neon_type[1].lane_nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *const i8'
+          links:
+            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
+
+  - name: "vld3{neon_type[2].lane_nox}"
+    doc: "Load multiple 3-element structures to three registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-unstable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const f32', float32x2x3_t, float32x2_t, f32, '1', '4']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[4]}']]
+      - LLVMLink:
+          name: 'vld3{neon_type[1].lane_nox}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i32'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: arm
+      - FnCall: ['_vld3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', '{type[5]}']]
+
+  - name: "vld3{neon_type[2].lane_nox}"
+    doc: "Load multiple 3-element structures to two registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-unstable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x8x3_t, int8x8_t, i8, '3', '1']
+      - ['*const i16', int16x4x3_t, int16x4_t, i16, '2', '2']
+      - ['*const i32', int32x2x3_t, int32x2_t, i32, '1', '4']
+      - ['*const i16', int16x8x3_t, int16x8_t, i16, '3', '2']
+      - ['*const i32', int32x4x3_t, int32x4_t, i32, '2', '4']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[4]}']]
+      - LLVMLink:
+          name: 'vld3{neon_type[1].lane_nox}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i32'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: arm
+      - FnCall: ['_vld3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', '{type[5]}']]
+
+  - name: "vld3{neon_type[2].lane_nox}"
+    doc: "Load multiple 3-element structures to three registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-unstable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const f32', float32x4x3_t, float32x4_t, f32, '2', '4']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[4]}']]
+      - LLVMLink:
+          name: 'vld3{neon_type[1].lane_nox}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'n: i32'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: arm
+      - FnCall: ['_vld3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', '{type[5]}']]
+
+  - name: "vld3{neon_type[1].lane_nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const u8', uint8x8x3_t, int8x8x3_t, '3']
+      - ['*const u16', uint16x4x3_t, int16x4x3_t, '2']
+      - ['*const u32', uint32x2x3_t, int32x2x3_t, '1']
+      - ['*const p8', poly8x8x3_t, int8x8x3_t, '3']
+      - ['*const u16', uint16x8x3_t, int16x8x3_t, '3']
+      - ['*const p16', poly16x4x3_t, int16x4x3_t, '2']
+      - ['*const p16', poly16x8x3_t, int16x8x3_t, '3']
+      - ['*const u32', uint32x4x3_t, int32x4x3_t, '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const u8', uint8x8x3_t, int8x8x3_t]
+      - ['*const u8', uint8x16x3_t, int8x16x3_t]
+      - ['*const u16', uint16x4x3_t, int16x4x3_t]
+      - ['*const u32', uint32x2x3_t, int32x2x3_t]
+      - ['*const u16', uint16x8x3_t, int16x8x3_t]
+      - ['*const u32', uint32x4x3_t, int32x4x3_t]
+      - ['*const p8', poly8x8x3_t, int8x8x3_t]
+      - ['*const p8', poly8x16x3_t, int8x16x3_t]
+      - ['*const p16', poly16x4x3_t, int16x4x3_t]
+      - ['*const p16', poly16x8x3_t, int16x8x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].nox}'
+                - - FnCall: [transmute, [a]]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const u64', uint64x1x3_t, int64x1x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].nox}'
+                - - FnCall: [transmute, [a]]
+
+  - name: "vld3{neon_type[1].nox}"
+    doc: Load multiple 3-element structures to three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const p64', poly64x1x3_t, int64x1x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld3{neon_type[1].dup_nox}"
+    doc: Load single 3-element structure and replicate to all lanes of three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*target-not-arm, *neon-stable]
+    assert_instr: [ld3r]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x3_t, i8]
+      - ["*const i16", int16x4x3_t, i16]
+      - ["*const i32", int32x2x3_t, i32]
+      - ["*const i32", int32x4x3_t, i32]
+      - ["*const i16", int16x8x3_t, i16]
+      - ["*const i8", int8x16x3_t, i8]
+      - ["*const i64", int64x1x3_t, i64]
+      - ["*const f32", float32x4x3_t, f32]
+      - ["*const f32", float32x2x3_t, f32]
+    compose:
+      - LLVMLink:
+          name: 'ld3r{neon_type[1].dup_nox}'
+          arguments:
+            - 'ptr: {type[0]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld3r.v{neon_type[1].lane}{type[2]}.p0{type[2]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as _']]
+
+  - name: "vld3{neon_type[1].dup_nox}"
+    doc: Load single 3-element structure and replicate to all lanes of three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*enable-v7, *target-is-arm, *neon-unstable]
+    assert_instr: [vld3]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x3_t, i8, '1']
+      - ["*const i16", int16x4x3_t, i16, '2']
+      - ["*const i32", int32x2x3_t, i32, '4']
+      - ["*const i8", int8x16x3_t, i8, '1']
+      - ["*const i16", int16x8x3_t, i16, '2']
+      - ["*const i32", int32x4x3_t, i32, '4']
+      - ["*const f32", float32x4x3_t, f32, '4']
+      - ["*const f32", float32x2x3_t, f32, '4']
+    compose:
+      - LLVMLink:
+          name: 'vld3{neon_type[1].dup_nox}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0i8'
+              arch: arm
+      - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as *const i8', '{type[3]}']]
+
+  - name: "vld3{neon_type[1].dup_nox}"
+    doc: Load single 3-element structure and replicate to all lanes of three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const u8', uint8x8x3_t, int8x8x3_t]
+      - ['*const u16', uint16x4x3_t, int16x4x3_t]
+      - ['*const u32', uint32x2x3_t, int32x2x3_t]
+      - ['*const u8', uint8x16x3_t, int8x16x3_t]
+      - ['*const u16', uint16x8x3_t, int16x8x3_t]
+      - ['*const u32', uint32x4x3_t, int32x4x3_t]
+      - ['*const p8', poly8x8x3_t, int8x8x3_t]
+      - ['*const p16', poly16x4x3_t, int16x4x3_t]
+      - ['*const p8', poly8x16x3_t, int8x16x3_t]
+      - ['*const p16', poly16x8x3_t, int16x8x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].dup_nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld3{neon_type[1].dup_nox}"
+    doc: Load single 3-element structure and replicate to all lanes of three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*target-is-arm, *enable-v7, *neon-unstable]
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x1x3_t, i64, '8']
+    compose:
+      - LLVMLink:
+          name: 'vld3{neon_type[1].dup_nox}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0i8'
+              arch: arm
+      - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as *const i8', '{type[3]}']]
+
+  - name: "vld3{neon_type[1].dup_nox}"
+    doc: Load single 3-element structure and replicate to all lanes of three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x1x3_t, int64x1x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].dup_nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld3{neon_type[1].dup_nox}"
+    doc: Load single 3-element structure and replicate to all lanes of three registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x1x3_t, int64x1x3_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld3{neon_type[2].dup_nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [ld4]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x8x4_t, i8, '*const int8x8_t']
+      - ['*const i32', int32x4x4_t, i32, '*const int32x4_t']
+      - ['*const i16', int16x4x4_t, i16, '*const int16x4_t']
+      - ['*const i32', int32x2x4_t, i32, '*const int32x2_t']
+      - ['*const i8', int8x16x4_t, i8, '*const int8x16_t']
+      - ['*const i16', int16x8x4_t, i16, '*const int16x8_t']
+      - ['*const f32', float32x2x4_t, f32, '*const float32x2_t']
+      - ['*const f32', float32x4x4_t, f32, '*const float32x4_t']
+    compose:
+      - LLVMLink:
+          name: 'vld4{neon_type[1].nox}'
+          arguments:
+            - 'ptr: {type[3]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr: [*target-not-arm, *neon-stable]
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i64', int64x1x4_t, i64, '*const int64x1_t']
+    compose:
+      - LLVMLink:
+          name: 'vld4{neon_type[1].nox}'
+          arguments:
+            - 'ptr: {type[3]}'
+          links:
+            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
+
+  - name: "vld4{neon_type[1].lane_nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-not-arm
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable
+    static_defs:
+      - "const LANE: i32"
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x8x4_t, int8x8_t, i8, '3']
+      - ['*const i16', int16x4x4_t, int16x4_t, i16, '2']
+      - ['*const i16', int16x8x4_t, int16x8_t, i16, '3']
+      - ['*const i32', int32x2x4_t, int32x2_t, i32, '1']
+      - ['*const i32', int32x4x4_t, int32x4_t, i32, '2']
+      - ['*const f32', float32x2x4_t, float32x2_t, f32, '1']
+      - ['*const f32', float32x4x4_t, float32x4_t, f32, '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
+      - LLVMLink:
+          name: 'ld4lane.{neon_type[2]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'n: i64'
+            - 'ptr: *const i8'
+          links:
+            - link: 'llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vld4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vld4]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x8x4_t, i8, '1']
+      - ['*const i16', int16x4x4_t, i16, '2']
+      - ['*const i32', int32x2x4_t, i32, '4']
+      - ['*const i8', int8x16x4_t, i8, '1']
+      - ['*const i16', int16x8x4_t, i16, '2']
+      - ['*const i32', int32x4x4_t, i32, '4']
+      - ['*const f32', float32x4x4_t, f32, '4']
+      - ['*const f32', float32x2x4_t, f32, '4']
+    compose:
+      - LLVMLink:
+          name: 'vld4{neon_type[1].nox}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0i8'
+              arch: arm
+      - FnCall: ['_vld4{neon_type[1].nox}', ['a as *const i8', '{type[3]}']]
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i64', int64x1x4_t, i64, '8']
+    compose:
+      - LLVMLink:
+          name: 'vld4{neon_type[1].nox}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0i8'
+              arch: arm
+      - FnCall: ['_vld4{neon_type[1].nox}', ['a as *const i8', '{type[3]}']]
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const u8', uint8x8x4_t, int8x8x4_t]
+      - ['*const u16', uint16x4x4_t, int16x4x4_t]
+      - ['*const u32', uint32x2x4_t, int32x2x4_t]
+      - ['*const u8', uint8x16x4_t, int8x16x4_t]
+      - ['*const u16', uint16x8x4_t, int16x8x4_t]
+      - ['*const u32', uint32x4x4_t, int32x4x4_t]
+      - ['*const p8', poly8x8x4_t, int8x8x4_t]
+      - ['*const p16', poly16x4x4_t, int16x4x4_t]
+      - ['*const p8', poly8x16x4_t, int8x16x4_t]
+      - ['*const p16', poly16x8x4_t, int16x8x4_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const u64', uint64x1x4_t, int64x1x4_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld4{neon_type[1].nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v8
+      - FnCall:
+          - target_feature
+          - - 'enable = "neon,aes"'
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const p64', poly64x1x4_t, int64x1x4_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].nox}'
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vld4{neon_type[1].lane_nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-unstable
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x8x4_t, int8x8_t, i8, '1', '3']
+      - ['*const i16', int16x4x4_t, int16x4_t, i16, '2', '2']
+      - ['*const i32', int32x2x4_t, int32x2_t, i32, '4', '1']
+      - ['*const i16', int16x8x4_t, int16x8_t, i16, '2', '3']
+      - ['*const i32', int32x4x4_t, int32x4_t, i32, '4', '2']
+      - ['*const f32', float32x2x4_t, float32x2_t, f32, '4', '1']
+      - ['*const f32', float32x4x4_t, float32x4_t, f32, '4', '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[5]}']]
+      - LLVMLink:
+          name: 'ld4lane.{neon_type[2]}'
+          arguments:
+            - 'ptr: *const i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'n: i32'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vld4lane.v{neon_type[1].lane}{type[3]}.p0i8'
+              arch: arm
+      - FnCall: ['vld4{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', LANE, '{type[4]}']]
+
+  - name: "vld4{neon_type[1].lane_nox}"
+    doc: Load multiple 4-element structures to four registers
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld4, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const u8', uint8x8x4_t, int8x8x4_t, '3']
+      - ['*const u16', uint16x4x4_t, int16x4x4_t, '2']
+      - ['*const u32', uint32x2x4_t, int32x2x4_t, '1']
+      - ['*const u16', uint16x8x4_t, int16x8x4_t, '3']
+      - ['*const u32', uint32x4x4_t, int32x4x4_t, '2']
+      - ['*const p8', poly8x8x4_t, int8x8x4_t, '3']
+      - ['*const p16', poly16x4x4_t, int16x4x4_t, '2']
+      - ['*const p16', poly16x8x4_t, int16x8x4_t, '3']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, ['LANE', '{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vld4{neon_type[2].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vst1{neon_type[1].lane_nox}"
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - ['*mut i64', int64x1_t]
+      - ['*mut u64', uint64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - Assign:
+          - "*a"
+          - FnCall: [simd_extract!, [b, 'LANE as u32']]
+      - Identifier: [';', Symbol]
+
+  - name: "vst1{neon_type[1].lane_nox}"
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v8
+      - FnCall:
+          - target_feature
+          - - 'enable = "neon,aes"'
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - ['*mut p64', poly64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - Assign:
+          - "*a"
+          - FnCall: [simd_extract!, [b, 'LANE as u32']]
+      - Identifier: [';', Symbol]
+
+  - name: "vst1{neon_type[1].lane_nox}"
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v8
+      - FnCall:
+          - target_feature
+          - - 'enable = "neon,aes"'
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - ['*mut p64', poly64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - Assign:
+          - "*a"
+          - FnCall: [simd_extract!, [b, 'LANE as u32']]
+      - Identifier: [';', Symbol]
+
+  - name: "vst1{neon_type[1].lane_nox}"
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - ['*mut i8', int8x8_t, '3']
+      - ['*mut i16', int16x4_t, '2']
+      - ['*mut i32', int32x2_t, '1']
+      - ['*mut i8', int8x16_t, '4']
+      - ['*mut i16', int16x8_t, '3']
+      - ['*mut i32', int32x4_t, '2']
+      - ['*mut i64', int64x2_t, '1']
+      - ['*mut u8', uint8x8_t, '3']
+      - ['*mut u16', uint16x4_t, '2']
+      - ['*mut u32', uint32x2_t, '1']
+      - ['*mut u8', uint8x16_t, '4']
+      - ['*mut u16', uint16x8_t, '3']
+      - ['*mut u32', uint32x4_t, '2']
+      - ['*mut u64', uint64x2_t, '1']
+      - ['*mut p8', poly8x8_t, '3']
+      - ['*mut p16', poly16x4_t, '2']
+      - ['*mut p8', poly8x16_t, '4']
+      - ['*mut p16', poly16x8_t, '3']
+      - ['*mut f32', float32x2_t, '1']
+      - ['*mut f32', float32x4_t, '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - Assign:
+          - "*a"
+          - FnCall: [simd_extract!, [b, 'LANE as u32']]
+      - Identifier: [';', Symbol]
+
+  - name: 'vst1{neon_type[1].no}'
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [st1]
+    types:
+      - [i8, int8x8x2_t, int8x8_t]
+      - [i16, int16x4x2_t, int16x4_t]
+      - [i32, int32x2x2_t, int32x2_t]
+      - [i64, int64x1x2_t, int64x1_t]
+      - [i8, int8x16x2_t, int8x16_t]
+      - [i16, int16x8x2_t, int16x8_t]
+      - [i32, int32x4x2_t, int32x4_t]
+      - [i64, int64x2x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: 'st1x2.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'ptr: *mut {type[0]}'
+          links:
+            - link: 'llvm.aarch64.neon.st1x2.v{neon_type[1].lane}{type[0]}.p0{type[0]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst1{neon_type[1].no}', ['b.0', 'b.1', 'a']]
+
+  - name: 'vst1{neon_type[1].no}'
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [st1]
+    types:
+      - [i8, int8x8x3_t, int8x8_t]
+      - [i16, int16x4x3_t, int16x4_t]
+      - [i32, int32x2x3_t, int32x2_t]
+      - [i64, int64x1x3_t, int64x1_t]
+      - [i8, int8x16x3_t, int8x16_t]
+      - [i16, int16x8x3_t, int16x8_t]
+      - [i32, int32x4x3_t, int32x4_t]
+      - [i64, int64x2x3_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: 'st1x3.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'ptr: *mut {type[0]}'
+          links:
+            - link: 'llvm.aarch64.neon.st1x3.v{neon_type[1].lane}{type[0]}.p0{type[0]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst1{neon_type[1].no}', ['b.0', 'b.1', 'b.2', 'a']]
+
+  - name: 'vst1{neon_type[1].no}'
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [st1]
+    types:
+      - [i8, int8x8x4_t, int8x8_t]
+      - [i16, int16x4x4_t, int16x4_t]
+      - [i32, int32x2x4_t, int32x2_t]
+      - [i64, int64x1x4_t, int64x1_t]
+      - [i8, int8x16x4_t, int8x16_t]
+      - [i16, int16x8x4_t, int16x8_t]
+      - [i32, int32x4x4_t, int32x4_t]
+      - [i64, int64x2x4_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: 'st1x4.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'ptr: *mut {type[0]}'
+          links:
+            - link: 'llvm.aarch64.neon.st1x4.v{neon_type[1].lane}{type[0]}.p0{type[0]}'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst1{neon_type[1].no}', ['b.0', 'b.1', 'b.2', 'b.3', 'a']]
+
+  - name: 'vst1{neon_type[1].no}'
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vst1]
+    types:
+      - [i8, int8x8x2_t, int8x8_t]
+      - [i16, int16x4x2_t, int16x4_t]
+      - [i32, int32x2x2_t, int32x2_t]
+      - [i64, int64x1x2_t, int64x1_t]
+      - [i8, int8x16x2_t, int8x16_t]
+      - [i16, int16x8x2_t, int16x8_t]
+      - [i32, int32x4x2_t, int32x4_t]
+      - [i64, int64x2x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: 'st1x2.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut {type[0]}'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+          links:
+            - link: 'llvm.arm.neon.vst1x2.p0{type[0]}.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1']]
+
+  - name: 'vst1{neon_type[1].no}'
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vst1]
+    types:
+      - [i8, int8x8x3_t, int8x8_t]
+      - [i16, int16x4x3_t, int16x4_t]
+      - [i32, int32x2x3_t, int32x2_t]
+      - [i64, int64x1x3_t, int64x1_t]
+      - [i8, int8x16x3_t, int8x16_t]
+      - [i16, int16x8x3_t, int16x8_t]
+      - [i32, int32x4x3_t, int32x4_t]
+      - [i64, int64x2x3_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: 'st1x3.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut {type[0]}'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+          links:
+            - link: 'llvm.arm.neon.vst1x3.p0{type[0]}.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2']]
+
+  - name: 'vst1{neon_type[1].no}'
+    doc: "Store multiple single-element structures from one, two, three, or four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [vst1]
+    types:
+      - [i8, int8x8x4_t, int8x8_t]
+      - [i16, int16x4x4_t, int16x4_t]
+      - [i32, int32x2x4_t, int32x2_t]
+      - [i64, int64x1x4_t, int64x1_t]
+      - [i8, int8x16x4_t, int8x16_t]
+      - [i16, int16x8x4_t, int16x8_t]
+      - [i32, int32x4x4_t, int32x4_t]
+      - [i64, int64x2x4_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: 'st1x4.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut {type[0]}'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+          links:
+            - link: 'llvm.arm.neon.vst1x4.p0{type[0]}.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']]
+
+  - name: 'vst1{neon_type[1].no}'
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [vst1]
+    types:
+      - [f32, float32x2x4_t, float32x2_t]
+      - [f32, float32x4x4_t, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: 'st1x4.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut {type[0]}'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+          links:
+            - link: 'llvm.arm.neon.vst1x4.p0{type[0]}.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v8
+      - *neon-aes
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - "vst2{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x1x2_t, int64x1_t]
+    compose:
+      - LLVMLink:
+          name: 'vst2.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vst2.p0i8.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', '8']]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [u64, uint64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - "vst2{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x1x2_t, int64x1_t]
+    compose:
+      - LLVMLink:
+          name: 'st2.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [st2]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x2_t, int8x8_t]
+      - [i16, int16x4x2_t, int16x4_t]
+      - [i32, int32x2x2_t, int32x2_t]
+      - [i8, int8x16x2_t, int8x16_t]
+      - [i16, int16x8x2_t, int16x8_t]
+      - [i32, int32x4x2_t, int32x4_t]
+      - [f32, float32x2x2_t, float32x2_t]
+      - [f32, float32x4x2_t, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: 'st2.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst2]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st2]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x8x2_t, int8x8x2_t]
+      - [u16, uint16x4x2_t, int16x4x2_t]
+      - [u32, uint32x2x2_t, int32x2x2_t]
+      - [u8, uint8x16x2_t, int8x16x2_t]
+      - [u16, uint16x8x2_t, int16x8x2_t]
+      - [u32, uint32x4x2_t, int32x4x2_t]
+      - [p8, poly8x8x2_t, int8x8x2_t]
+      - [p16, poly16x4x2_t, int16x4x2_t]
+      - [p8, poly8x16x2_t, int8x16x2_t]
+      - [p16, poly16x8x2_t, int16x8x2_t]
+    compose:
+      - FnCall:
+          - "vst2{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst2{neon_type[1].lane_nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-not-arm
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
+      - *neon-stable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x2_t, '3', int8x8_t]
+      - [i16, int16x4x2_t, '2', int16x4_t]
+      - [i32, int32x2x2_t, '1', int32x2_t]
+      - [i16, int16x8x2_t, '3', int16x8_t]
+      - [i32, int32x4x2_t, '2', int32x4_t]
+      - [f32, float32x2x2_t, '1', float32x2_t]
+      - [f32, float32x4x2_t, '2', float32x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - LLVMLink:
+          name: 'vst2.{neon_type[1].lane_nox}'
+          arguments:
+            - 'a: {type[3]}'
+            - 'b: {type[3]}'
+            - 'n: i64'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st2lane.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst2{neon_type[1].lane_nox}', ['b.0', 'b.1', 'LANE as i64', 'a as _']]
+
+  - name: "vst2{neon_type[1].lane_nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst2, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x8x2_t, int8x8x2_t, '3']
+      - [u16, uint16x4x2_t, int16x4x2_t, '2']
+      - [u32, uint32x2x2_t, int32x2x2_t, '1']
+      - [u16, uint16x8x2_t, int16x8x2_t, '3']
+      - [u32, uint32x4x2_t, int32x4x2_t, '2']
+      - [p8, poly8x8x2_t, int8x8x2_t, '3']
+      - [p16, poly16x4x2_t, int16x4x2_t, '2']
+      - [p16, poly16x8x2_t, int16x8x2_t, '3']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vst2{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst2{neon_type[1].nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [vst2]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x2_t, int8x8_t, '1']
+      - [i16, int16x4x2_t, int16x4_t, '2']
+      - [i32, int32x2x2_t, int32x2_t, '4']
+      - [i8, int8x16x2_t, int8x16_t, '1']
+      - [i16, int16x8x2_t, int16x8_t, '2']
+      - [i32, int32x4x2_t, int32x4_t, '4']
+      - [f32, float32x2x2_t, float32x2_t, '4']
+      - [f32, float32x4x2_t, float32x4_t, '4']
+    compose:
+      - LLVMLink:
+          name: 'vst2.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vst2.p0i8.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', "{type[3]}"]]
+
+  - name: "vst2{neon_type[1].lane_nox}"
+    doc: "Store multiple 2-element structures from two registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst2, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-unstable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x2_t, '3', int8x8_t, '1']
+      - [i16, int16x4x2_t, '2', int16x4_t, '2']
+      - [i32, int32x2x2_t, '1', int32x2_t, '4']
+      - [i16, int16x8x2_t, '3', int16x8_t, '2']
+      - [i32, int32x4x2_t, '2', int32x4_t, '4']
+      - [f32, float32x4x2_t, '2', float32x4_t, '4']
+      - [f32, float32x2x2_t, '1', float32x2_t, '4']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - LLVMLink:
+          name: 'vst2lane.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut i8'
+            - 'a: {type[3]}'
+            - 'b: {type[3]}'
+            - 'n: i32'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vst2lane.p0i8.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'LANE', "{type[4]}"]]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x1x3_t, int64x1_t]
+    compose:
+      - LLVMLink:
+          name: 'st3.{neon_type[1].nox}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v8
+      - *neon-aes
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x1x3_t, int64x1x3_t]
+    compose:
+      - FnCall:
+          - "vst3{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x1x3_t, int64x1_t]
+    compose:
+      - LLVMLink:
+          name: 'vst3.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vst3.p0i8.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', '8']]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [u64, uint64x1x3_t, int64x1x3_t]
+    compose:
+      - FnCall:
+          - "vst3{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].lane_nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst3, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x8x3_t, int8x8x3_t, '3']
+      - [u16, uint16x4x3_t, int16x4x3_t, '2']
+      - [u32, uint32x2x3_t, int32x2x3_t, '1']
+      - [u16, uint16x8x3_t, int16x8x3_t, '3']
+      - [u32, uint32x4x3_t, int32x4x3_t, '2']
+      - [p8, poly8x8x3_t, int8x8x3_t, '3']
+      - [p16, poly16x4x3_t, int16x4x3_t, '2']
+      - [p16, poly16x8x3_t, int16x8x3_t, '3']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vst3{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst3]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st3]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x8x3_t, int8x8x3_t]
+      - [u16, uint16x4x3_t, int16x4x3_t]
+      - [u32, uint32x2x3_t, int32x2x3_t]
+      - [u8, uint8x16x3_t, int8x16x3_t]
+      - [u16, uint16x8x3_t, int16x8x3_t]
+      - [u32, uint32x4x3_t, int32x4x3_t]
+      - [p8, poly8x8x3_t, int8x8x3_t]
+      - [p16, poly16x4x3_t, int16x4x3_t]
+      - [p8, poly8x16x3_t, int8x16x3_t]
+      - [p16, poly16x8x3_t, int16x8x3_t]
+    compose:
+      - FnCall:
+          - "vst3{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [vst3]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x3_t, int8x8_t, '1']
+      - [i16, int16x4x3_t, int16x4_t, '2']
+      - [i32, int32x2x3_t, int32x2_t, '4']
+      - [i8, int8x16x3_t, int8x16_t, '1']
+      - [i16, int16x8x3_t, int16x8_t, '2']
+      - [i32, int32x4x3_t, int32x4_t, '4']
+      - [f32, float32x2x3_t, float32x2_t, '4']
+      - [f32, float32x4x3_t, float32x4_t, '4']
+    compose:
+      - LLVMLink:
+          name: 'vst3.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vst3.p0i8.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]]
+
+  - name: "vst3{neon_type[1].lane_nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst3, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-unstable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x3_t, '3', int8x8_t, '1']
+      - [i16, int16x4x3_t, '2', int16x4_t, '2']
+      - [i32, int32x2x3_t, '1', int32x2_t, '4']
+      - [i16, int16x8x3_t, '3', int16x8_t, '2']
+      - [i32, int32x4x3_t, '2', int32x4_t, '4']
+      - [f32, float32x2x3_t, '1', float32x2_t, '4']
+      - [f32, float32x4x3_t, '2', float32x4_t, '4']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - LLVMLink:
+          name: 'vst3lane.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut i8'
+            - 'a: {type[3]}'
+            - 'b: {type[3]}'
+            - 'c: {type[3]}'
+            - 'n: i32'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vst3lane.p0i8.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', "{type[4]}"]]
+
+  - name: "vst3{neon_type[1].nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr: [*target-not-arm, *neon-stable]
+    assert_instr: [st3]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x3_t, int8x8_t]
+      - [i16, int16x4x3_t, int16x4_t]
+      - [i32, int32x2x3_t, int32x2_t]
+      - [i8, int8x16x3_t, int8x16_t]
+      - [i16, int16x8x3_t, int16x8_t]
+      - [i32, int32x4x3_t, int32x4_t]
+      - [f32, float32x2x3_t, float32x2_t]
+      - [f32, float32x4x3_t, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: 'vst3.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+
+  - name: "vst3{neon_type[1].lane_nox}"
+    doc: "Store multiple 3-element structures from three registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-not-arm
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
+      - *neon-stable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x3_t, '3', int8x8_t]
+      - [i16, int16x4x3_t, '2', int16x4_t]
+      - [i32, int32x2x3_t, '1', int32x2_t]
+      - [i16, int16x8x3_t, '3', int16x8_t]
+      - [i32, int32x4x3_t, '2', int32x4_t]
+      - [f32, float32x2x3_t, '1', float32x2_t]
+      - [f32, float32x4x3_t, '2', float32x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - LLVMLink:
+          name: 'vst3.{neon_type[1].lane_nox}'
+          arguments:
+            - 'a: {type[3]}'
+            - 'b: {type[3]}'
+            - 'c: {type[3]}'
+            - 'n: i64'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st3lane.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v8
+      - *neon-aes
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [p64, poly64x1x4_t, int64x1x4_t]
+    compose:
+      - FnCall:
+          - "vst4{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x1x4_t, int64x1_t]
+    compose:
+      - LLVMLink:
+          name: 'vst4.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vst4.p0i8.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', '8']]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-not-arm
+      - *neon-stable
+    assert_instr: [nop]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i64, int64x1x4_t, int64x1_t]
+    compose:
+      - LLVMLink:
+          name: 'vst4.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st4.{neon_type[2]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [u64, uint64x1x4_t, int64x1x3_t]
+    compose:
+      - FnCall:
+          - "vst4{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].lane_nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst4, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x8x4_t, int8x8x4_t, '3']
+      - [u16, uint16x4x4_t, int16x4x4_t, '2']
+      - [u32, uint32x2x4_t, int32x2x4_t, '1']
+      - [u16, uint16x8x4_t, int16x8x4_t, '3']
+      - [u32, uint32x4x4_t, int32x4x4_t, '2']
+      - [p8, poly8x8x4_t, int8x8x4_t, '3']
+      - [p16, poly16x4x4_t, int16x4x4_t, '2']
+      - [p16, poly16x8x4_t, int16x8x4_t, '3']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vst4{neon_type[2].lane_nox}::<LANE>"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vst4]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [st4]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [u8, uint8x8x4_t, int8x8x4_t]
+      - [u16, uint16x4x4_t, int16x4x4_t]
+      - [u32, uint32x2x4_t, int32x2x4_t]
+      - [u8, uint8x16x4_t, int8x16x4_t]
+      - [u16, uint16x8x4_t, int16x8x4_t]
+      - [u32, uint32x4x4_t, int32x4x4_t]
+      - [p8, poly8x8x4_t, int8x8x4_t]
+      - [p16, poly16x4x4_t, int16x4x4_t]
+      - [p8, poly8x16x4_t, int8x16x4_t]
+      - [p16, poly16x8x4_t, int16x8x4_t]
+    compose:
+      - FnCall:
+          - "vst4{neon_type[2].nox}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - *neon-unstable
+    assert_instr: [vst4]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x4_t, int8x8_t, '1']
+      - [i16, int16x4x4_t, int16x4_t, '2']
+      - [i32, int32x2x4_t, int32x2_t, '4']
+      - [i8, int8x16x4_t, int8x16_t, '1']
+      - [i16, int16x8x4_t, int16x8_t, '2']
+      - [i32, int32x4x4_t, int32x4_t, '4']
+      - [f32, float32x2x4_t, float32x2_t, '4']
+      - [f32, float32x4x4_t, float32x4_t, '4']
+    compose:
+      - LLVMLink:
+          name: 'vst4.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut i8'
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vst4.p0i8.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', "{type[3]}"]]
+
+  - name: "vst4{neon_type[1].lane_nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst4, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-unstable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x4_t, '3', int8x8_t, '1']
+      - [i16, int16x4x4_t, '2', int16x4_t, '2']
+      - [i32, int32x2x4_t, '1', int32x2_t, '4']
+      - [i16, int16x8x4_t, '3', int16x8_t, '2']
+      - [i32, int32x4x4_t, '2', int32x4_t, '4']
+      - [f32, float32x2x4_t, '1', float32x2_t, '4']
+      - [f32, float32x4x4_t, '2', float32x4_t, '4']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - LLVMLink:
+          name: 'vst4lane.{neon_type[1]}'
+          arguments:
+            - 'ptr: *mut i8'
+            - 'a: {type[3]}'
+            - 'b: {type[3]}'
+            - 'c: {type[3]}'
+            - 'd: {type[3]}'
+            - 'n: i32'
+            - 'size: i32'
+          links:
+            - link: 'llvm.arm.neon.vst4lane.p0i8.v{neon_type[1].lane}{type[0]}'
+              arch: arm
+      - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', 'LANE', "{type[4]}"]]
+
+  - name: "vst4{neon_type[1].nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr: [*target-not-arm, *neon-stable]
+    assert_instr: [st4]
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x4_t, int8x8_t]
+      - [i16, int16x4x4_t, int16x4_t]
+      - [i32, int32x2x4_t, int32x2_t]
+      - [i8, int8x16x4_t, int8x16_t]
+      - [i16, int16x8x4_t, int16x8_t]
+      - [i32, int32x4x4_t, int32x4_t]
+      - [f32, float32x2x4_t, float32x2_t]
+      - [f32, float32x4x4_t, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: 'vst4.{neon_type[1]}'
+          arguments:
+            - 'a: {type[2]}'
+            - 'b: {type[2]}'
+            - 'c: {type[2]}'
+            - 'd: {type[2]}'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+
+  - name: "vst4{neon_type[1].lane_nox}"
+    doc: "Store multiple 4-element structures from four registers"
+    arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - *target-not-arm
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
+      - *neon-stable
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [i8, int8x8x4_t, '3', int8x8_t]
+      - [i16, int16x4x4_t, '2', int16x4_t]
+      - [i32, int32x2x4_t, '1', int32x2_t]
+      - [i16, int16x8x4_t, '3', int16x8_t]
+      - [i32, int32x4x4_t, '2', int32x4_t]
+      - [f32, float32x2x4_t, '1', float32x2_t]
+      - [f32, float32x4x4_t, '2', float32x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - LLVMLink:
+          name: 'vst4.{neon_type[1].lane_nox}'
+          arguments:
+            - 'a: {type[3]}'
+            - 'b: {type[3]}'
+            - 'c: {type[3]}'
+            - 'd: {type[3]}'
+            - 'n: i64'
+            - 'ptr: *mut i8'
+          links:
+            - link: 'llvm.aarch64.neon.st4lane.v{neon_type[1].lane}{type[0]}.p0i8'
+              arch: aarch64,arm64ec
+      - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
+
+  - name: "vusdot{neon_type[0].no}"
+    doc: "Dot product vector form with unsigned and signed integers"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-i8mm
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot]]}]]
+      - *neon-unstable-i8mm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, uint8x8_t, int8x8_t]
+      - [int32x4_t, uint8x16_t, int8x16_t]
+    compose:
+      - LLVMLink:
+          name: "usdot.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.usdot.v{neon_type[0].lane}i32.v{neon_type[1].lane}i8"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.usdot.v{neon_type[0].lane}i32.v{neon_type[1].lane}i8"
+              arch: arm
+
+  - name: "vusdot{type[0]}"
+    doc: "Dot product index form with unsigned and signed integers"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: int8x8_t"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-i8mm
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-i8mm
+      - *neon-unstable-is-arm
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]']
+      - ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - Let:
+          - c
+          - int32x2_t
+          - FnCall: [transmute, [c]]
+      - Let:
+          - c
+          - "{type[1]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+      - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]]
+
+  - name: "vsudot{neon_type[0].lane_nox}"
+    doc: "Dot product index form with signed and unsigned integers"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-i8mm
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - *neon-unstable-i8mm
+      - *neon-unstable-is-arm
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t]
+      - [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - Let:
+          - c
+          - uint32x2_t
+          - FnCall: [transmute, [c]]
+      - Let:
+          - c
+          - "{type[4]}"
+          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+      - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]]
+
+  - name: "vmul{neon_type[1].no}"
+    doc: Multiply
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmul{type[0]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mul]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['.i8', int8x8_t]
+      - ['.i8', int8x16_t]
+      - ['.i16', int16x4_t]
+      - ['.i16', int16x8_t]
+      - ['.i32', int32x2_t]
+      - ['.i32', int32x4_t]
+      - ['.i8', uint8x8_t]
+      - ['.i8', uint8x16_t]
+      - ['.i16', uint16x4_t]
+      - ['.i16', uint16x8_t]
+      - ['.i32', uint32x2_t]
+      - ['.i32', uint32x4_t]
+    compose:
+      - FnCall: [simd_mul, [a, b]]
+
+  - name: "vmul{neon_type[1].no}"
+    doc: Multiply
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmul.{type[0]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [f32, float32x2_t]
+      - [f32, float32x4_t]
+    compose:
+      - FnCall: [simd_mul, [a, b]]
+
+  - name: "vmul{neon_type[0].lane_nox}"
+    doc: Multiply
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mul, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]]
+
+  - name: "vmul{neon_type[0].laneq_nox}"
+    doc: Multiply
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmul, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [mul, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]]
+
+  - name: "vmull{neon_type[1].no}"
+    doc: Signed multiply long
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmull.{type[0]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smull]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["s8", int8x8_t, int16x8_t]
+      - ["s16", int16x4_t, int32x4_t]
+      - ["s32", int32x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "smull.{neon_type[1]}"
+          links:
+            - link: "llvm.aarch64.neon.smull.{neon_type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vmulls.{neon_type[1]}"
+              arch: arm
+
+  - name: "vmull{neon_type[1].no}"
+    doc: "Unsigned multiply long"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmull.{type[0]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umull]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u8", uint8x8_t, uint16x8_t]
+      - ["u16", uint16x4_t, uint32x4_t]
+      - ["u32", uint32x2_t, uint64x2_t]
+    compose:
+      - LLVMLink:
+          name: "smull.{neon_type[1]}"
+          links:
+            - link: "llvm.aarch64.neon.umull.{neon_type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vmullu.{neon_type[1]}"
+              arch: arm
+
+  - name: "vmull{neon_type[1].no}"
+    doc: "Polynomial multiply long"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmull.{type[0]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [pmull]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ["p8", poly8x8_t, poly16x8_t, int8x8_t]
+    compose:
+      - LLVMLink:
+          name: "pmull.{neon_type[1].no}"
+          links:
+            - link: "llvm.aarch64.neon.pmull.{neon_type[3]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vmullp.{neon_type[3]}"
+              arch: arm
+
+  - name: "vmull_n{neon_type[0].no}"
+    doc: Vector long multiply with scalar
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vmull"]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smull]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, "i16", int32x4_t]
+      - [int32x2_t, "i32", int64x2_t]
+    compose:
+      - FnCall:
+          - "vmull{neon_type[0].no}"
+          - - a
+            - FnCall:
+                - "vdup_n{neon_type[0].no}"
+                - - b
+
+  - name: "vmull_n{neon_type[0].no}"
+    doc: Vector long multiply with scalar
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vmull"]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umull]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x4_t, "u16", uint32x4_t]
+      - [uint32x2_t, "u32", uint64x2_t]
+    compose:
+      - FnCall:
+          - "vmull{neon_type[0].no}"
+          - - a
+            - FnCall:
+                - "vdup_n{neon_type[0].no}"
+                - - b
+
+  - name: "vfma{neon_type.no}"
+    doc: Floating-point fused Multiply-Add to accumulator(vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]]
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - 'target_arch = "arm"'
+            - FnCall:
+                - assert_instr
+                - - vfma
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - FnCall:
+                      - any
+                      - - 'target_arch = "aarch64"'
+                        - 'target_arch = "arm64ec"'
+            - FnCall:
+                - assert_instr
+                - - fmla
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - not
+                - - 'target_arch = "arm"'
+            - FnCall:
+                - stable
+                - - 'feature = "neon_intrinsics"'
+                  - 'since = "1.59.0"'
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - unstable
+                - - 'feature = "stdarch_arm_neon_intrinsics"'
+                  - 'issue = "111800"'
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "fma.{neon_type}"
+          links:
+            - link: "llvm.fma.{neon_type}"
+              arch: aarch64
+            - link: "llvm.fma.{neon_type}"
+              arch: arm
+      - FnCall: ["_vfma{neon_type.no}", [b, c, a]]
+
+  - name: "vfma{neon_type[0].N}"
+    doc: Floating-point fused Multiply-Add to accumulator(vector)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]]
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - 'target_arch = "arm"'
+            - FnCall:
+                - assert_instr
+                - - vfma
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - FnCall:
+                      - any
+                      - - 'target_arch = "aarch64"'
+                        - 'target_arch = "arm64ec"'
+            - FnCall:
+                - assert_instr
+                - - fmla
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - not
+                - - 'target_arch = "arm"'
+            - FnCall:
+                - stable
+                - - 'feature = "neon_intrinsics"'
+                  - 'since = "1.59.0"'
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - unstable
+                - - 'feature = "stdarch_arm_neon_intrinsics"'
+                  - 'issue = "111800"'
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, f32]
+      - [float32x4_t, f32]
+    compose:
+      - FnCall:
+          - "vfma{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall:
+                - "vdup{neon_type[0].N}_vfp4"
+                - - c
+
+  - name: "vsub{neon_type[1].no}"
+    doc: "Subtract"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsub{type[0]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sub]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['.i8', int8x8_t]
+      - ['.i8', int8x16_t]
+      - ['.i16', int16x4_t]
+      - ['.i16', int16x8_t]
+      - ['.i32', int32x2_t]
+      - ['.i32', int32x4_t]
+      - ['.i8', uint8x8_t]
+      - ['.i8', uint8x16_t]
+      - ['.i16', uint16x4_t]
+      - ['.i16', uint16x8_t]
+      - ['.i32', uint32x2_t]
+      - ['.i32', uint32x4_t]
+      - ['.i64', int64x1_t]
+      - ['.i64', int64x2_t]
+      - ['.i64', uint64x1_t]
+      - ['.i64', uint64x2_t]
+    compose:
+      - FnCall: [simd_sub, [a, b]]
+
+  - name: "vsub{neon_type[1].no}"
+    doc: "Subtract"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsub.{type[0]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fsub]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['f32', float32x2_t]
+      - ['f32', float32x4_t]
+    compose:
+      - FnCall: [simd_sub, [a, b]]
+
+  - name: "vadd{neon_type.no}"
+    doc: Bitwise exclusive OR
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - target_feature
+                - - 'enable = "v7"'
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - 'target_arch = "arm"'
+            - FnCall:
+                - assert_instr
+                - - nop
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - FnCall:
+                      - any
+                      - - 'target_arch = "aarch64"'
+                        - 'target_arch = "arm64ec"'
+            - FnCall:
+                - assert_instr
+                - - nop
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - not
+                - - 'target_arch = "arm"'
+            - FnCall:
+                - stable
+                - - 'feature = "neon_intrinsics"'
+                  - 'since = "1.59.0"'
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - unstable
+                - - 'feature = "stdarch_arm_neon_intrinsics"'
+                  - 'issue = "111800"'
+    safety:
+      unsafe: [neon]
+    types:
+      - poly8x8_t
+      - poly16x4_t
+      - poly8x16_t
+      - poly16x8_t
+      - poly64x1_t
+      - poly64x2_t
+    compose:
+      - FnCall:
+          - simd_xor
+          - - a
+            - b
+
+  - name: "vaddq_{type}"
+    doc: Bitwise exclusive OR
+    arguments: ["a: {type}", "b: {type}"]
+    return_type: "{type}"
+    attr:
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - target_feature
+                - - 'enable = "v7"'
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - 'target_arch = "arm"'
+            - FnCall:
+                - assert_instr
+                - - nop
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - all
+                - - test
+                  - FnCall:
+                      - any
+                      - - 'target_arch = "aarch64"'
+                        - 'target_arch = "arm64ec"'
+            - FnCall:
+                - assert_instr
+                - - nop
+      - FnCall:
+          - cfg_attr
+          - - FnCall:
+                - not
+                - - 'target_arch = "arm"'
+            - FnCall:
+                - stable
+                - - 'feature = "neon_intrinsics"'
+                  - 'since = "1.59.0"'
+      - FnCall:
+          - cfg_attr
+          - - 'target_arch = "arm"'
+            - FnCall:
+                - unstable
+                - - 'feature = "stdarch_arm_neon_intrinsics"'
+                  - 'issue = "111800"'
+    safety:
+      unsafe: [neon]
+    types:
+      - p128
+    compose:
+      - Xor:
+          - a
+          - b
+
+  - name: "vsubhn{neon_type[0].noq}"
+    doc: Subtract returning high narrow
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vsubhn"]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [subhn]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, 'i16x8', 'i16x8::new(8, 8, 8, 8, 8, 8, 8, 8)']
+      - [int32x4_t, int16x4_t, 'i32x4', 'i32x4::new(16, 16, 16, 16)']
+      - [int64x2_t, int32x2_t, 'i64x2', 'i64x2::new(32, 32)']
+      - [uint16x8_t, uint8x8_t, 'u16x8', 'u16x8::new(8, 8, 8, 8, 8, 8, 8, 8)']
+      - [uint32x4_t, uint16x4_t, 'u32x4', 'u32x4::new(16, 16, 16, 16)']
+      - [uint64x2_t, uint32x2_t, 'u64x2', 'u64x2::new(32, 32)']
+    compose:
+      - Let: [c, "{type[2]}", "{type[3]}"]
+      - FnCall:
+          - simd_cast
+          - - FnCall:
+                - simd_shr
+                - - FnCall: [simd_sub, [a, b]]
+                  - FnCall: [transmute, [c]]
+
+  - name: "vsubhn_high{neon_type[1].noq}"
+    doc: Subtract returning high narrow
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vsubhn"]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [subhn2]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+    compose:
+      - Let:
+          - d
+          - "{neon_type[0]}"
+          - FnCall: ["vsubhn{neon_type[1].noq}", [b, c]]
+      - FnCall: [simd_shuffle!, [a, d, "{type[3]}"]]
+
+  - name: "vhsub{neon_type[1].no}"
+    doc: "Signed halving subtract"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vhsub.{type[0]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uhsub]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['u8', uint8x8_t]
+      - ['u8', uint8x16_t]
+      - ['u16', uint16x4_t]
+      - ['u16', uint16x8_t]
+      - ['u32', uint32x2_t]
+      - ['u32', uint32x4_t]
+    compose:
+      - LLVMLink:
+          name: "uhsub.{neon_type[1].no}"
+          links:
+            - link: "llvm.aarch64.neon.uhsub.{neon_type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vhsubu.{neon_type[1]}"
+              arch: arm
+
+  - name: "vhsub{neon_type[1].no}"
+    doc: "Signed halving subtract"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vhsub.{type[0]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [shsub]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - ['s8', int8x8_t]
+      - ['s8', int8x16_t]
+      - ['s16', int16x4_t]
+      - ['s16', int16x8_t]
+      - ['s32', int32x2_t]
+      - ['s32', int32x4_t]
+    compose:
+      - LLVMLink:
+          name: "shsub.{neon_type[1].no}"
+          links:
+            - link: "llvm.aarch64.neon.shsub.{neon_type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vhsubs.{neon_type[1]}"
+              arch: arm
+
+  - name: "vsubw{neon_type[1].noq}"
+    doc: Signed Subtract Wide
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsubw]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ssubw]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t]
+      - [int32x4_t, int16x4_t]
+      - [int64x2_t, int32x2_t]
+    compose:
+      - FnCall:
+          - simd_sub
+          - - a
+            - FnCall: [simd_cast, [b]]
+
+  - name: "vsubw{neon_type[1].noq}"
+    doc: Unsigned Subtract Wide
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsubw]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usubw]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t]
+      - [uint32x4_t, uint16x4_t]
+      - [uint64x2_t, uint32x2_t]
+    compose:
+      - FnCall:
+          - simd_sub
+          - - a
+            - FnCall: [simd_cast, [b]]
+
+  - name: "vsubl{neon_type[0].noq}"
+    doc: "Signed Subtract Long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsubl]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ssubl]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t]
+      - [int16x4_t, int32x4_t]
+      - [int32x2_t, int64x2_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[1]}"
+          - FnCall: [simd_cast, [a]]
+      - Let:
+          - d
+          - "{neon_type[1]}"
+          - FnCall: [simd_cast, [b]]
+      - FnCall: [simd_sub, [c, d]]
+
+  - name: "vsubl{neon_type[0].noq}"
+    doc: "Unsigned Subtract Long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsubl]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usubl]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint16x8_t]
+      - [uint16x4_t, uint32x4_t]
+      - [uint32x2_t, uint64x2_t]
+    compose:
+      - Let:
+          - c
+          - "{neon_type[1]}"
+          - FnCall: [simd_cast, [a]]
+      - Let:
+          - d
+          - "{neon_type[1]}"
+          - FnCall: [simd_cast, [b]]
+      - FnCall: [simd_sub, [c, d]]
+
+  - name: "vdot{neon_type[0].no}"
+    doc: Dot product arithmetic (vector)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v8
+      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsdot]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sdot]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int8x8_t]
+      - [int32x4_t, int8x16_t]
+    compose:
+      - LLVMLink:
+          name: "sdot.{neon_type[0]}.{neon_type[1]}"
+          links:
+            - link: "llvm.arm.neon.sdot.{neon_type[0]}.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.sdot.{neon_type[0]}.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vdot{neon_type[0].no}"
+    doc: Dot product arithmetic (vector)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v8
+      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vudot]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [udot]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, uint8x8_t]
+      - [uint32x4_t, uint8x16_t]
+    compose:
+      - LLVMLink:
+          name: "udot.{neon_type[0]}.{neon_type[1]}"
+          links:
+            - link: "llvm.arm.neon.udot.{neon_type[0]}.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.udot.{neon_type[0]}.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vdot{neon_type[0].lane_nox}"
+    doc: Dot product arithmetic (indexed)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    static_defs: ["const LANE: i32"]
+    attr:
+      - *neon-v8
+      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsdot, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]']
+      - [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - Let:
+          - c
+          - "{neon_type[3]}"
+          - FnCall: [transmute, [c]]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+      - FnCall:
+          - "vdot{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: [transmute, [c]]
+
+  - name: "vdot{neon_type[0].lane_nox}"
+    doc: Dot product arithmetic (indexed)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    static_defs: ["const LANE: i32"]
+    attr:
+      - *neon-v8
+      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vudot, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]]
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - Let:
+          - c
+          - "{neon_type[3]}"
+          - FnCall: [transmute, [c]]
+      - Let:
+          - c
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+      - FnCall:
+          - "vdot{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: [transmute, [c]]
+
+  - name: "vmax{neon_type.no}"
+    doc: Maximum (vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smax]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "smax.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmaxs.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.smax.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vmax{neon_type.no}"
+    doc: Maximum (vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umax]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: "smax.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmaxu.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.umax.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vmax{neon_type.no}"
+    doc: Maximum (vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmax]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "smax.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmaxs.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fmax.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vmaxnm{neon_type.no}"
+    doc: Floating-point Maximum Number (vector)
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmaxnm]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmaxnm]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "fmaxnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmaxnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vmin{neon_type.no}"
+    doc: "Minimum (vector)"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smin]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "smin.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmins.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.smin.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vmin{neon_type.no}"
+    doc: "Minimum (vector)"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umin]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: "umin.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vminu.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.umin.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vmin{neon_type.no}"
+    doc: "Minimum (vector)"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmin]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "fmin.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmins.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fmin.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vminnm{neon_type.no}"
+    doc: "Floating-point Minimum Number (vector)"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp-armv8,v8"']]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vminnm]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fminnm]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "fminnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vminnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vpadd{neon_type.no}"
+    doc: Floating-point add pairwise
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [faddp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+    compose:
+      - LLVMLink:
+          name: "faddp.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vpadd.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.faddp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vqdmull{neon_type[0].noq}"
+    doc: "Signed saturating doubling multiply long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmull]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmull]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int32x4_t]
+      - [int32x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vqdmull{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vqdmull.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.sqdmull.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqdmull_n{neon_type[0].no}"
+    doc: "Vector saturating doubling long multiply with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmull]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmull]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, "i16", int32x4_t]
+      - [int32x2_t, "i32", int64x2_t]
+    compose:
+      - FnCall: ["vqdmull{neon_type[0].noq}", [a, {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]]
+
+  - name: "vqdmull_lane_s16"
+    doc: "Vector saturating doubling long multiply by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmull, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int16x4_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '2']]
+      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - FnCall: [vqdmull_s16, [a, b]]
+
+  - name: "vqdmull_lane_s32"
+    doc: "Vector saturating doubling long multiply by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmull, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmull, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int32x2_t, int64x2_t, '[N as u32, N as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '1']]
+      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - FnCall: [vqdmull_s32, [a, b]]
+
+  - name: "vqdmlal{neon_type[1].noq}"
+    doc: "Signed saturating doubling multiply-add long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmlal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmlal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, int16x4_t, int32x4_t]
+      - [int64x2_t, int32x2_t, int32x2_t, int64x2_t]
+    compose:
+      - FnCall: ["vqadd{neon_type[0].no}", [a, {FnCall: ["vqdmull{neon_type[2].noq}", [b, c]]}]]
+
+  - name: "vqdmlal_n{neon_type[1].noq}"
+    doc: "Vector widening saturating doubling multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmlal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmlal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, "i16", int32x4_t]
+      - [int64x2_t, int32x2_t, "i32", int64x2_t]
+    compose:
+      - FnCall: ["vqadd{neon_type[0].no}", [a, {FnCall: ["vqdmull_n{neon_type[1].noq}", [b, c]]}]]
+
+  - name: "vqdmlal_lane_s16"
+    doc: "Vector widening saturating doubling multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmlal, N = 2]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, int16x4_t, int32x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '2']]
+      - FnCall: [vqaddq_s32, [a, {FnCall: ["vqdmull_lane_s16::<N>", [b, c]]}]]
+
+  - name: "vqdmlal_lane_s32"
+    doc: "Vector widening saturating doubling multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmlal, N = 1]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x2_t, int32x2_t, int32x2_t, int64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '1']]
+      - FnCall: [vqaddq_s64, [a, {FnCall: ["vqdmull_lane_s32::<N>", [b, c]]}]]
+
+  - name: "vqdmlsl{neon_type[1].noq}"
+    doc: "Signed saturating doubling multiply-subtract long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmlsl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmlsl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, int16x4_t, int32x4_t]
+      - [int64x2_t, int32x2_t, int32x2_t, int64x2_t]
+    compose:
+      - FnCall: ["vqsub{neon_type[0].no}", [a, {FnCall: ["vqdmull{neon_type[1].noq}", [b, c]]}]]
+
+  - name: "vqdmlsl{type[4]}"
+    doc: "Vector widening saturating doubling multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmlsl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmlsl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, "i16", int32x4_t, '_n_s16']
+      - [int64x2_t, int32x2_t, "i32", int64x2_t, '_n_s32']
+    compose:
+      - FnCall: ["vqsub{neon_type[0].no}", [a, {FnCall: ["vqdmull{type[4]}", [b, c]]}]]
+
+  - name: "vqdmlsl_lane_s16"
+    doc: "Vector widening saturating doubling multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmlsl, N = 2]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x4_t, int16x4_t, int16x4_t, int32x4_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '2']]
+      - FnCall: [vqsubq_s32, [a, {FnCall: ["vqdmull_lane_s16::<N>", [b, c]]}]]
+
+  - name: "vqdmlsl_lane_s32"
+    doc: "Vector widening saturating doubling multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmlsl, N = 1]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x2_t, int32x2_t, int32x2_t, int64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, '1']]
+      - FnCall: [vqsubq_s64, [a, {FnCall: ["vqdmull_lane_s32::<N>", [b, c]]}]]
+
+  - name: "vqdmulh{neon_type[0].no}"
+    doc: "Signed saturating doubling multiply returning high half"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmulh]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmulh]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int16x4_t, int16x4_t]
+      - [int16x8_t, int16x8_t, int16x8_t]
+      - [int32x2_t, int32x2_t, int32x2_t]
+      - [int32x4_t, int32x4_t, int32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vqdmulh{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vqdmulh.{neon_type[0]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.sqdmulh.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqdmulh{type[3]}"
+    doc: "Vector saturating doubling multiply high with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmulh]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmulh]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, "i16", int16x4_t, '_n_s16']
+      - [int32x2_t, "i32", int32x2_t, '_n_s32']
+      - [int16x8_t, "i16", int16x8_t, 'q_n_s16']
+      - [int32x4_t, "i32", int32x4_t, 'q_n_s32']
+    compose:
+      - Let: [b, "{neon_type[0]}", {FnCall: ["vdup{type[3]}", [b]]}]
+      - FnCall: ["vqdmulh{neon_type[0].no}", [a, b]]
+
+  - name: "vqmovn{neon_type[0].noq}"
+    doc: "Signed saturating extract narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqmovn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqxtn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t]
+      - [int32x4_t, int16x4_t]
+      - [int64x2_t, int32x2_t]
+    compose:
+      - LLVMLink:
+          name: "vqmovn{neon_type[0].noq}"
+          links:
+            - link: "llvm.arm.neon.vqmovns.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.sqxtn.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqmovun{neon_type[0].noq}"
+    doc: "Signed saturating extract unsigned narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqmovun]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqxtun]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, uint8x8_t]
+      - [int32x4_t, uint16x4_t]
+      - [int64x2_t, uint32x2_t]
+    compose:
+      - LLVMLink:
+          name: "vqmovun{neon_type[0].noq}"
+          links:
+            - link: "llvm.arm.neon.vqmovnsu.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.sqxtun.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqrdmulh{neon_type[0].no}"
+    doc: "Signed saturating rounding doubling multiply returning high half"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqrdmulh]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqrdmulh]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int16x4_t, int16x4_t]
+      - [int16x8_t, int16x8_t, int16x8_t]
+      - [int32x2_t, int32x2_t, int32x2_t]
+      - [int32x4_t, int32x4_t, int32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vqrdmulh{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vqrdmulh.{neon_type[0]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.sqrdmulh.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqrshl{neon_type.no}"
+    doc: "Signed saturating rounding shift left"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqrshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqrshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+      - int64x1_t
+      - int64x2_t
+    compose:
+      - LLVMLink:
+          name: "vqrshl{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vqrshifts.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.sqrshl.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vqrshl{neon_type[0].no}"
+    doc: "Unsigned signed saturating rounding shift left"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqrshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [uqrshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [uint8x16_t, int8x16_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x2_t, int32x2_t]
+      - [uint32x4_t, int32x4_t]
+      - [uint64x1_t, int64x1_t]
+      - [uint64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vqrshl{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vqrshiftu.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.uqrshl.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqrshrn_n{neon_type[0].noq}"
+    doc: "Signed saturating rounded shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqrshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
+      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int16x8_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
+      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int16x8_t([-N as i64, -N as i64]) }']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vqrshrn{neon_type[0].noq}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: {neon_type[0]}"
+          links:
+            - link: "llvm.arm.neon.vqrshiftns.{neon_type[1]}"
+              arch: arm
+      - FnCall: ["_vqrshrn_n{neon_type[0].noq}", [a, '{type[3]}']]
+
+  - name: "vqrshrn_n{neon_type[0].noq}"
+    doc: "Signed saturating rounded shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8']
+      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16']
+      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vqrshrn{neon_type[0].no}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.sqrshrn.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vqrshrn_n{neon_type[0].noq}", [a, N]]
+
+  - name: "vqrshrun_n{neon_type[0].noq}"
+    doc: "Signed saturating rounded shift right unsigned narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqrshrun, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
+      - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
+      - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - LLVMLink:
+          name: "vqrshrun_n{neon_type[0].noq}"
+          arguments:
+            - 'a: {neon_type[0]}'
+            - 'n: {neon_type[0]}'
+          links:
+            - link: "llvm.arm.neon.vqrshiftnsu.{neon_type[1]}"
+              arch: arm
+      - FnCall:
+          - "_vqrshrun_n{neon_type[0].noq}"
+          - - a
+            - "{type[3]}"
+
+  - name: "vqrshrun_n{neon_type[0].noq}"
+    doc: "Signed saturating rounded shift right unsigned narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8']
+      - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16']
+      - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vqrshrun_n{neon_type[0].noq}"
+          arguments:
+            - 'a: {neon_type[0]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.sqrshrun.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vqrshrun_n{neon_type[0].noq}", [a, N]]
+
+  - name: "vqshl{neon_type.no}"
+    doc: "Signed saturating shift left"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+      - int64x1_t
+      - int64x2_t
+    compose:
+      - LLVMLink:
+          name: "vqshl{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vqshifts.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.sqshl.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vqshl{neon_type[0].N}"
+    doc: "Signed saturating shift left"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqshl, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqshl, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, '3']
+      - [int8x16_t, '3']
+      - [int16x4_t, '4']
+      - [int16x8_t, '4']
+      - [int32x2_t, '5']
+      - [int32x4_t, '5']
+      - [int64x1_t, '6']
+      - [int64x2_t, '6']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[1]}"]]
+      - FnCall:
+          - "vqshl{neon_type[0].no}"
+          - - a
+            - FnCall: ["vdup{neon_type[0].N}", ['N as _']]
+
+  - name: "vqshl{neon_type[0].no}"
+    doc: "Unsigned saturating shift left"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [uqshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [uint8x16_t, int8x16_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x2_t, int32x2_t]
+      - [uint32x4_t, int32x4_t]
+      - [uint64x1_t, int64x1_t]
+      - [uint64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vqshl{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vqshiftu.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.uqshl.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqshl{neon_type[0].N}"
+    doc: "Unsigned saturating shift left"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqshl, N = 2]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [uqshl, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, '3', int8x8_t]
+      - [uint8x16_t, '3', int8x16_t]
+      - [uint16x4_t, '4', int16x4_t]
+      - [uint16x8_t, '4', int16x8_t]
+      - [uint32x2_t, '5', int32x2_t]
+      - [uint32x4_t, '5', int32x4_t]
+      - [uint64x1_t, '6', int64x1_t]
+      - [uint64x2_t, '6', int64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[1]}"]]
+      - FnCall:
+          - "vqshl{neon_type[0].no}"
+          - - a
+            - FnCall: ["vdup{neon_type[2].N}", ['N as _']]
+
+  - name: "vqshrn_n{neon_type[0].noq}"
+    doc: "Signed saturating shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
+      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
+      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vqshrn{neon_type[0].no}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: {neon_type[0]}"
+          links:
+            - link: "llvm.arm.neon.vqshiftns.{neon_type[1]}"
+              arch: arm
+      - FnCall: ["_vqshrn_n{neon_type[0].noq}", [a, "{type[3]}"]]
+
+  - name: "vqshrn_n{neon_type[0].noq}"
+    doc: "Signed saturating shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8']
+      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16']
+      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vqshrn_n{neon_type[0].noq}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.sqshrn.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vqshrn_n{neon_type[0].noq}", [a, N]]
+
+  - name: "vqshrn_n_{neon_type[0]}"
+    doc: "Unsigned saturating shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
+      - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
+      - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vqshrn_n_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: {neon_type[0]}"
+          links:
+            - link: "llvm.arm.neon.vqshiftnu.{neon_type[1]}"
+              arch: arm
+      - FnCall: ["_vqshrn_n_{neon_type[0]}", ["a.as_signed()", "{type[3]}"]]
+
+  - name: "vqshrn_n_{neon_type[0]}"
+    doc: "Unsigned saturating shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8']
+      - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16']
+      - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vqshrn{neon_type[1].no}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.uqshrn.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vqshrn_n_{neon_type[0]}", ["a.as_signed()", N]]
+
+  - name: "vqshrun_n_{neon_type[0]}"
+    doc: "Signed saturating shift right unsigned narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqshrun, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
+      - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
+      - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vqshrun_n_{neon_type[1]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: {neon_type[0]}"
+          links:
+            - link: "llvm.arm.neon.vqshiftnsu.{neon_type[1]}"
+              arch: arm
+      - FnCall: ["_vqshrun_n_{neon_type[1]}", [a, "{type[3]}"]]
+
+  - name: "vqshrun_n_{neon_type[0]}"
+    doc: "Signed saturating shift right unsigned narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, uint8x8_t, 'N >= 1 && N <= 8']
+      - [int32x4_t, uint16x4_t, 'N >= 1 && N <= 16']
+      - [int64x2_t, uint32x2_t, 'N >= 1 && N <= 32']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vqshrun_n_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.sqshrun.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vqshrun_n_{neon_type[0]}", [a, N]]
+
+  - name: "vrsqrts{neon_type.no}"
+    doc: "Floating-point reciprocal square root step"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsqrts]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frsqrts]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "vrsqrts{neon_type.no}"
+          links:
+            - link: "llvm.arm.neon.vrsqrts.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.frsqrts.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrecpe{neon_type.no}"
+    doc: "Reciprocal estimate."
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrecpe]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frecpe]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "vrecpe{neon_type.no}"
+          links:
+            - link: "llvm.arm.neon.vrecpe.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.frecpe.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrecps{neon_type.no}"
+    doc: "Floating-point reciprocal step"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrecps]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frecps]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "vrecps{neon_type.no}"
+          links:
+            - link: "llvm.arm.neon.vrecps.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.frecps.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-aes
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [poly64x1_t, int32x2_t]
+      - [poly64x1_t, uint32x2_t]
+      - [poly64x2_t, int32x4_t]
+      - [poly64x2_t, uint32x4_t]
+      - [p128, int64x2_t]
+      - [p128, uint64x2_t]
+      - [p128, poly64x2_t]
+      - [poly8x16_t, p128]
+      - [p128, int8x16_t]
+      - [p128, uint8x16_t]
+      - [p128, poly8x16_t]
+      - [int32x2_t, poly64x1_t]
+      - [uint32x2_t, poly64x1_t]
+      - [int32x4_t, poly64x2_t]
+      - [uint32x4_t, poly64x2_t]
+      - [int64x2_t, p128]
+      - [uint64x2_t, p128]
+      - [poly64x2_t, p128]
+      - [poly64x1_t, int16x4_t]
+      - [poly64x1_t, uint16x4_t]
+      - [poly64x1_t, poly16x4_t]
+      - [poly64x2_t, int16x8_t]
+      - [poly64x2_t, uint16x8_t]
+      - [poly64x2_t, poly16x8_t]
+      - [p128, int32x4_t]
+      - [p128, uint32x4_t]
+      - [poly16x4_t, poly64x1_t]
+      - [int16x4_t, poly64x1_t]
+      - [uint16x4_t, poly64x1_t]
+      - [poly16x8_t, poly64x2_t]
+      - [int16x8_t, poly64x2_t]
+      - [uint16x8_t, poly64x2_t]
+      - [int32x4_t, p128]
+      - [uint32x4_t, p128]
+      - [poly64x1_t, int8x8_t]
+      - [poly64x1_t, uint8x8_t]
+      - [poly64x1_t, poly8x8_t]
+      - [poly64x2_t, int8x16_t]
+      - [poly64x2_t, uint8x16_t]
+      - [poly64x2_t, poly8x16_t]
+      - [p128, int16x8_t]
+      - [p128, uint16x8_t]
+      - [p128, poly16x8_t]
+      - [poly8x8_t, poly64x1_t]
+      - [int8x8_t, poly64x1_t]
+      - [uint8x8_t, poly64x1_t]
+      - [poly8x16_t, poly64x2_t]
+      - [int8x16_t, poly64x2_t]
+      - [uint8x16_t, poly64x2_t]
+      - [int16x8_t, p128]
+      - [uint16x8_t, p128]
+      - [poly16x8_t, p128]
+      - [int8x16_t, p128]
+      - [uint8x16_t, p128]
+    compose:
+      - FnCall: [transmute, [a]]
+
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [poly8x8_t, int8x8_t]
+      - [poly16x4_t, int16x4_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint32x2_t, int32x2_t]
+      - [uint64x1_t, int64x1_t]
+      - [uint8x16_t, int8x16_t]
+      - [poly8x16_t, int8x16_t]
+      - [poly16x8_t, int16x8_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x4_t, int32x4_t]
+      - [uint64x2_t, int64x2_t]
+      - [poly8x8_t, uint8x8_t]
+      - [int8x8_t, uint8x8_t]
+      - [poly16x4_t, uint16x4_t]
+      - [int16x4_t, uint16x4_t]
+      - [int32x2_t, uint32x2_t]
+      - [int64x1_t, uint64x1_t]
+      - [poly8x16_t, uint8x16_t]
+      - [int8x16_t, uint8x16_t]
+      - [poly16x8_t, uint16x8_t]
+      - [int16x8_t, uint16x8_t]
+      - [int32x4_t, uint32x4_t]
+      - [int64x2_t, uint64x2_t]
+      - [int8x8_t, poly8x8_t]
+      - [uint8x8_t, poly8x8_t]
+      - [int16x4_t, poly16x4_t]
+      - [uint16x4_t, poly16x4_t]
+      - [int8x16_t, poly8x16_t]
+      - [uint8x16_t, poly8x16_t]
+      - [int16x8_t, poly16x8_t]
+      - [uint16x8_t, poly16x8_t]
+      - [int16x4_t, int8x8_t]
+      - [uint16x4_t, int8x8_t]
+      - [poly16x4_t, int8x8_t]
+      - [int32x2_t, int16x4_t]
+      - [uint32x2_t, int16x4_t]
+      - [int64x1_t, int32x2_t]
+      - [uint64x1_t, int32x2_t]
+      - [int16x8_t, int8x16_t]
+      - [uint16x8_t, int8x16_t]
+      - [poly16x8_t, int8x16_t]
+      - [int32x4_t, int16x8_t]
+      - [uint32x4_t, int16x8_t]
+      - [int64x2_t, int32x4_t]
+      - [uint64x2_t, int32x4_t]
+      - [poly16x4_t, uint8x8_t]
+      - [int16x4_t, uint8x8_t]
+      - [uint16x4_t, uint8x8_t]
+      - [int32x2_t, uint16x4_t]
+      - [uint32x2_t, uint16x4_t]
+      - [int64x1_t, uint32x2_t]
+      - [uint64x1_t, uint32x2_t]
+      - [poly16x8_t, uint8x16_t]
+      - [int16x8_t, uint8x16_t]
+      - [uint16x8_t, uint8x16_t]
+      - [int32x4_t, uint16x8_t]
+      - [uint32x4_t, uint16x8_t]
+      - [int64x2_t, uint32x4_t]
+      - [uint64x2_t, uint32x4_t]
+      - [poly16x4_t, poly8x8_t]
+      - [int16x4_t, poly8x8_t]
+      - [uint16x4_t, poly8x8_t]
+      - [int32x2_t, poly16x4_t]
+      - [uint32x2_t, poly16x4_t]
+      - [poly16x8_t, poly8x16_t]
+      - [int16x8_t, poly8x16_t]
+      - [uint16x8_t, poly8x16_t]
+      - [int32x4_t, poly16x8_t]
+      - [uint32x4_t, poly16x8_t]
+      - [poly8x8_t, int16x4_t]
+      - [int8x8_t, int16x4_t]
+      - [uint8x8_t, int16x4_t]
+      - [poly16x4_t, int32x2_t]
+      - [int16x4_t, int32x2_t]
+      - [uint16x4_t, int32x2_t]
+      - [int32x2_t, int64x1_t]
+      - [uint32x2_t, int64x1_t]
+      - [poly8x16_t, int16x8_t]
+      - [int8x16_t, int16x8_t]
+      - [uint8x16_t, int16x8_t]
+      - [poly16x8_t, int32x4_t]
+      - [int16x8_t, int32x4_t]
+      - [uint16x8_t, int32x4_t]
+      - [int32x4_t, int64x2_t]
+      - [uint32x4_t, int64x2_t]
+      - [poly8x8_t, uint16x4_t]
+      - [int8x8_t, uint16x4_t]
+      - [uint8x8_t, uint16x4_t]
+      - [poly16x4_t, uint32x2_t]
+      - [int16x4_t, uint32x2_t]
+      - [uint16x4_t, uint32x2_t]
+      - [int32x2_t, uint64x1_t]
+      - [uint32x2_t, uint64x1_t]
+      - [poly8x16_t, uint16x8_t]
+      - [int8x16_t, uint16x8_t]
+      - [uint8x16_t, uint16x8_t]
+      - [poly16x8_t, uint32x4_t]
+      - [int16x8_t, uint32x4_t]
+      - [uint16x8_t, uint32x4_t]
+      - [int32x4_t, uint64x2_t]
+      - [uint32x4_t, uint64x2_t]
+      - [poly8x8_t, poly16x4_t]
+      - [int8x8_t, poly16x4_t]
+      - [uint8x8_t, poly16x4_t]
+      - [poly8x16_t, poly16x8_t]
+      - [int8x16_t, poly16x8_t]
+      - [uint8x16_t, poly16x8_t]
+      - [int32x2_t, int8x8_t]
+      - [uint32x2_t, int8x8_t]
+      - [int64x1_t, int16x4_t]
+      - [uint64x1_t, int16x4_t]
+      - [int32x4_t, int8x16_t]
+      - [uint32x4_t, int8x16_t]
+      - [int64x2_t, int16x8_t]
+      - [uint64x2_t, int16x8_t]
+      - [int32x2_t, uint8x8_t]
+      - [uint32x2_t, uint8x8_t]
+      - [int64x1_t, uint16x4_t]
+      - [uint64x1_t, uint16x4_t]
+      - [int32x4_t, uint8x16_t]
+      - [uint32x4_t, uint8x16_t]
+      - [int64x2_t, uint16x8_t]
+      - [uint64x2_t, uint16x8_t]
+      - [int32x2_t, poly8x8_t]
+      - [uint32x2_t, poly8x8_t]
+      - [int64x1_t, poly16x4_t]
+      - [uint64x1_t, poly16x4_t]
+      - [int32x4_t, poly8x16_t]
+      - [uint32x4_t, poly8x16_t]
+      - [int64x2_t, poly16x8_t]
+      - [uint64x2_t, poly16x8_t]
+      - [poly8x8_t, int32x2_t]
+      - [int8x8_t, int32x2_t]
+      - [uint8x8_t, int32x2_t]
+      - [poly16x4_t, int64x1_t]
+      - [int16x4_t, int64x1_t]
+      - [uint16x4_t, int64x1_t]
+      - [poly8x16_t, int32x4_t]
+      - [int8x16_t, int32x4_t]
+      - [uint8x16_t, int32x4_t]
+      - [poly16x8_t, int64x2_t]
+      - [int16x8_t, int64x2_t]
+      - [uint16x8_t, int64x2_t]
+      - [poly8x8_t, uint32x2_t]
+      - [int8x8_t, uint32x2_t]
+      - [uint8x8_t, uint32x2_t]
+      - [poly16x4_t, uint64x1_t]
+      - [int16x4_t, uint64x1_t]
+      - [uint16x4_t, uint64x1_t]
+      - [poly8x16_t, uint32x4_t]
+      - [int8x16_t, uint32x4_t]
+      - [uint8x16_t, uint32x4_t]
+      - [poly16x8_t, uint64x2_t]
+      - [int16x8_t, uint64x2_t]
+      - [uint16x8_t, uint64x2_t]
+      - [int64x1_t, int8x8_t]
+      - [uint64x1_t, int8x8_t]
+      - [int64x1_t, uint8x8_t]
+      - [uint64x1_t, uint8x8_t]
+      - [int64x1_t, poly8x8_t]
+      - [uint64x1_t, poly8x8_t]
+      - [int64x2_t, int8x16_t]
+      - [uint64x2_t, int8x16_t]
+      - [int64x2_t, uint8x16_t]
+      - [uint64x2_t, uint8x16_t]
+      - [int64x2_t, poly8x16_t]
+      - [uint64x2_t, poly8x16_t]
+      - [poly8x8_t, int64x1_t]
+      - [int8x8_t, int64x1_t]
+      - [uint8x8_t, int64x1_t]
+      - [poly8x8_t, uint64x1_t]
+      - [int8x8_t, uint64x1_t]
+      - [uint8x8_t, uint64x1_t]
+      - [poly8x16_t, int64x2_t]
+      - [int8x16_t, int64x2_t]
+      - [uint8x16_t, int64x2_t]
+      - [poly8x16_t, uint64x2_t]
+      - [int8x16_t, uint64x2_t]
+      - [uint8x16_t, uint64x2_t]
+      - [float32x2_t, int8x8_t]
+      - [float32x2_t, int16x4_t]
+      - [float32x2_t, int32x2_t]
+      - [float32x2_t, int64x1_t]
+      - [float32x4_t, int8x16_t]
+      - [float32x4_t, int16x8_t]
+      - [float32x4_t, int32x4_t]
+      - [float32x4_t, int64x2_t]
+      - [float32x2_t, uint8x8_t]
+      - [float32x2_t, uint16x4_t]
+      - [float32x2_t, uint32x2_t]
+      - [float32x2_t, uint64x1_t]
+      - [float32x4_t, uint8x16_t]
+      - [float32x4_t, uint16x8_t]
+      - [float32x4_t, uint32x4_t]
+      - [float32x4_t, uint64x2_t]
+      - [float32x2_t, poly8x8_t]
+      - [float32x2_t, poly16x4_t]
+      - [float32x4_t, poly8x16_t]
+      - [float32x4_t, poly16x8_t]
+      - [float32x4_t, p128]
+      - [int8x8_t, float32x2_t]
+      - [int16x4_t, float32x2_t]
+      - [int32x2_t, float32x2_t]
+      - [int64x1_t, float32x2_t]
+      - [int8x16_t, float32x4_t]
+      - [int16x8_t, float32x4_t]
+      - [int32x4_t, float32x4_t]
+      - [int64x2_t, float32x4_t]
+      - [uint8x8_t, float32x2_t]
+      - [uint16x4_t, float32x2_t]
+      - [uint32x2_t, float32x2_t]
+      - [uint64x1_t, float32x2_t]
+      - [uint8x16_t, float32x4_t]
+      - [uint16x8_t, float32x4_t]
+      - [uint32x4_t, float32x4_t]
+      - [uint64x2_t, float32x4_t]
+      - [poly8x8_t, float32x2_t]
+      - [poly16x4_t, float32x2_t]
+      - [poly8x16_t, float32x4_t]
+      - [poly16x8_t, float32x4_t]
+      - [p128, float32x4_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+  - name: "vrshl{neon_type.no}"
+    doc: "Signed rounding shift left"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [srshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+      - int64x1_t
+      - int64x2_t
+    compose:
+      - LLVMLink:
+          name: "vrshl{neon_type.no}"
+          links:
+            - link: "llvm.arm.neon.vrshifts.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.srshl.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrshl{neon_type[0].no}"
+    doc: "Unsigned rounding shift left"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [urshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [uint8x16_t, int8x16_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x2_t, int32x2_t]
+      - [uint32x4_t, int32x4_t]
+      - [uint64x1_t, int64x1_t]
+      - [uint64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vrshl{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vrshiftu.{neon_type[0]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.urshl.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vrshr{neon_type[0].N}"
+    doc: "Signed rounding shift right"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrshr, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [srshr, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'N >= 1 && N <= 8']
+      - [int8x16_t, 'N >= 1 && N <= 8']
+      - [int16x4_t, 'N >= 1 && N <= 16']
+      - [int16x8_t, 'N >= 1 && N <= 16']
+      - [int32x2_t, 'N >= 1 && N <= 32']
+      - [int32x4_t, 'N >= 1 && N <= 32']
+      - [int64x1_t, 'N >= 1 && N <= 64']
+      - [int64x2_t, 'N >= 1 && N <= 64']
+    compose:
+      - FnCall: [static_assert!, ["{type[1]}"]]
+      - FnCall:
+          - "vrshl{neon_type[0].no}"
+          - - a
+            - FnCall: ["vdup{neon_type[0].N}", ['-N as _']]
+
+  - name: "vrshr{neon_type[0].N}"
+    doc: "Unsigned rounding shift right"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrshr, N = 2]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [urshr, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t, 'N >= 1 && N <= 8']
+      - [uint8x16_t, int8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, int16x4_t, 'N >= 1 && N <= 16']
+      - [uint16x8_t, int16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, int32x2_t, 'N >= 1 && N <= 32']
+      - [uint32x4_t, int32x4_t, 'N >= 1 && N <= 32']
+      - [uint64x1_t, int64x1_t, 'N >= 1 && N <= 64']
+      - [uint64x2_t, int64x2_t, 'N >= 1 && N <= 64']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - FnCall:
+          - "vrshl{neon_type[0].no}"
+          - - a
+            - FnCall: ["vdup{neon_type[1].N}", ['-N as _']]
+
+  - name: "vrshrn_n_{neon_type[0]}"
+    doc: "Rounding shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vrshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8', 'const { int16x8_t([-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16]) }']
+      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16', 'const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) }']
+      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32', 'const { int64x2_t([-N as i64, -N as i64]) }']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vrshrn_n_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: {neon_type[0]}"
+          links:
+            - link: "llvm.arm.neon.vrshiftn.{neon_type[1]}"
+              arch: arm
+      - FnCall: ["_vrshrn_n_{neon_type[0]}", [a, "{type[3]}"]]
+
+  - name: "vrshrn_n_{neon_type[0]}"
+    doc: "Rounding shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, 'N >= 1 && N <= 8']
+      - [int32x4_t, int16x4_t, 'N >= 1 && N <= 16']
+      - [int64x2_t, int32x2_t, 'N >= 1 && N <= 32']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - LLVMLink:
+          name: "vrshrn_n_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.rshrn.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vrshrn_n_{neon_type[0]}", [a, N]]
+
+  - name: "vrshrn_n_{neon_type[0]}"
+    doc: "Rounding shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrshrn, N = 2]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [rshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', s16]
+      - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', s32]
+      - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', s64]
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vrshrn_n_{type[3]}::<N>"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vrsra{neon_type[0].N}"
+    doc: "Signed rounding shift right and accumulate"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsra, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [srsra, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'N >= 1 && N <= 8']
+      - [int8x16_t, 'N >= 1 && N <= 8']
+      - [int16x4_t, 'N >= 1 && N <= 16']
+      - [int16x8_t, 'N >= 1 && N <= 16']
+      - [int32x2_t, 'N >= 1 && N <= 32']
+      - [int32x4_t, 'N >= 1 && N <= 32']
+      - [int64x1_t, 'N >= 1 && N <= 64']
+      - [int64x2_t, 'N >= 1 && N <= 64']
+    compose:
+      - FnCall: [static_assert!, ["{type[1]}"]]
+      - FnCall:
+          - simd_add
+          - - a
+            - FnCall: ["vrshr{neon_type[0].N}::<N>", [b]]
+
+  - name: "vrsubhn_{neon_type[0]}"
+    doc: "Rounding subtract returning high narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsubhn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [rsubhn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int16x8_t, int8x8_t]
+      - [int32x4_t, int32x4_t, int16x4_t]
+      - [int64x2_t, int64x2_t, int32x2_t]
+    compose:
+      - LLVMLink:
+          name: "vrsubhn_{neon_type[0]}"
+          links:
+            - link: "llvm.arm.neon.vrsubhn.{neon_type[2]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.rsubhn.{neon_type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vrsubhn_{neon_type[0]}"
+    doc: "Rounding subtract returning high narrow"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsubhn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [rsubhn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint16x8_t, uint8x8_t, s16]
+      - [uint32x4_t, uint32x4_t, uint16x4_t, s32]
+      - [uint64x2_t, uint64x2_t, uint32x2_t, s64]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vrsubhn_{type[3]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vcreate_{neon_type[1]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u64", int8x8_t]
+      - ["u64", int16x4_t]
+      - ["u64", int32x2_t]
+      - ["u64", int64x1_t]
+      - ["u64", uint8x8_t]
+      - ["u64", uint16x4_t]
+      - ["u64", uint32x2_t]
+      - ["u64", uint64x1_t]
+      - ["u64", poly8x8_t]
+      - ["u64", poly16x4_t]
+      - ["u64", float32x2_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+  - name: "vcreate_p64"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,aes"']]
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v8"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["u64", poly64x1_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+  - name: "vset{neon_type[1].lane_nox}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop, LANE = 0]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i8", int8x8_t, '3']
+      - ["i16", int16x4_t, '2']
+      - ["i32", int32x2_t, '1']
+      - ["u8", uint8x8_t, '3']
+      - ["u16", uint16x4_t, '2']
+      - ["u32", uint32x2_t, '1']
+      - ["p8", poly8x8_t, '3']
+      - ["p16", poly16x4_t, '2']
+      - ["i8", int8x16_t, '4']
+      - ["i16", int16x8_t, '3']
+      - ["i32", int32x4_t, '2']
+      - ["i64", int64x2_t, '1']
+      - ["u8", uint8x16_t, '4']
+      - ["u16", uint16x8_t, '3']
+      - ["u32", uint32x4_t, '2']
+      - ["u64", uint64x2_t, '1']
+      - ["p8", poly8x16_t, '4']
+      - ["p16", poly16x8_t, '3']
+      - ["f32", float32x2_t, '1']
+      - ["f32", float32x4_t, '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - FnCall: [simd_insert!, [b, 'LANE as u32', a]]
+
+  - name: "vset_lane_{neon_type[0]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["i64", int64x1_t, int64x1_t]
+      - ["u64", uint64x1_t, uint64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall: [simd_insert!, [b, 'LANE as u32', a]]
+
+  - name: "vset_lane_{neon_type[0]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,aes"']]
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v8"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["p64", poly64x1_t, poly64x1_t]
+    compose:
+      - FnCall: [static_assert!, ['LANE == 0']]
+      - FnCall: [simd_insert!, [b, 'LANE as u32', a]]
+
+  - name: "vsetq_lane_p64"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,aes"']]
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v8"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - ["p64", poly64x2_t, poly64x2_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
+      - FnCall: [simd_insert!, [b, 'LANE as u32', a]]
+
+  - name: "vshl{neon_type.no}"
+    doc: "Signed Shift left"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+      - int64x1_t
+      - int64x2_t
+    compose:
+      - LLVMLink:
+          name: "vshl{neon_type.no}"
+          links:
+            - link: "llvm.arm.neon.vshifts.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.sshl.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vshl{neon_type[0].no}"
+    doc: "Unsigned Shift left"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vshl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ushl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [uint8x16_t, int8x16_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x2_t, int32x2_t]
+      - [uint32x4_t, int32x4_t]
+      - [uint64x1_t, int64x1_t]
+      - [uint64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vshl{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vshiftu.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.ushl.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vshll_n_s8"
+    doc: "Signed shift left long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vshll.s8"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sshll, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int16x8_t, 'N >= 0 && N <= 8']
+    compose:
+      - FnCall: [static_assert!, ["{type[2]}"]]
+      - FnCall:
+          - simd_shl
+          - - FnCall: [simd_cast, [a]]
+            - FnCall: [vdupq_n_s16, ['N as _']]
+
+  - name: "vshll_n_s16"
+    doc: "Signed shift left long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vshll.s16"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sshll, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int32x4_t]
+    compose:
+      - FnCall: [static_assert!, ["N >= 0 && N <= 16"]]
+      - FnCall:
+          - simd_shl
+          - - FnCall: [simd_cast, [a]]
+            - FnCall: [vdupq_n_s32, ['N as _']]
+
+  - name: "vshll_n_s32"
+    doc: "Signed shift left long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vshll.s32"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sshll, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int64x2_t]
+    compose:
+      - FnCall: [static_assert!, ["N >= 0 && N <= 32"]]
+      - FnCall:
+          - simd_shl
+          - - FnCall: [simd_cast, [a]]
+            - FnCall: [vdupq_n_s64, ['N as _']]
+
+  - name: "vshll_n_u8"
+    doc: "Signed shift left long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vshll.u8"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ushll, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint16x8_t]
+    compose:
+      - FnCall: [static_assert!, ["N >= 0 && N <= 8"]]
+      - FnCall:
+          - simd_shl
+          - - FnCall: [simd_cast, [a]]
+            - FnCall: [vdupq_n_u16, ['N as _']]
+
+  - name: "vshll_n_u16"
+    doc: "Signed shift left long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vshll.u16"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ushll, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x4_t, uint32x4_t]
+    compose:
+      - FnCall: [static_assert!, ["N >= 0 && N <= 16"]]
+      - FnCall:
+          - simd_shl
+          - - FnCall: [simd_cast, [a]]
+            - FnCall: [vdupq_n_u32, ['N as _']]
+
+  - name: "vshll_n_u32"
+    doc: "Signed shift left long"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vshll.u32"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ushll, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, uint64x2_t]
+    compose:
+      - FnCall: [static_assert!, ["N >= 0 && N <= 32"]]
+      - FnCall:
+          - simd_shl
+          - - FnCall: [simd_cast, [a]]
+            - FnCall: [vdupq_n_u64, ['N as _']]
+
+  - name: "vshr{neon_type[0].N}"
+    doc: "Shift right"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vshr.{neon_type[0]}"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sshr, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'N >= 1 && N <= 8', 'let n: i32 = if N == 8 { 7 }', 'else { N };']
+      - [int8x16_t, 'N >= 1 && N <= 8', 'let n: i32 = if N == 8 { 7 }', 'else { N };']
+      - [int16x4_t, 'N >= 1 && N <= 16', 'let n: i32 = if N == 16 { 15 }', 'else { N };']
+      - [int16x8_t, 'N >= 1 && N <= 16', 'let n: i32 = if N == 16 { 15 }', 'else { N };']
+      - [int32x2_t, 'N >= 1 && N <= 32', 'let n: i32 = if N == 32 { 31 }', 'else { N };']
+      - [int32x4_t, 'N >= 1 && N <= 32', 'let n: i32 = if N == 32 { 31 }', 'else { N };']
+      - [int64x1_t, 'N >= 1 && N <= 64', 'let n: i32 = if N == 64 { 63 }', 'else { N };']
+      - [int64x2_t, 'N >= 1 && N <= 64', 'let n: i32 = if N == 64 { 63 }', 'else { N };']
+    compose:
+      - FnCall: [static_assert!, ["{type[1]}"]]
+      - Identifier: ["{type[2]}{type[3]}", Symbol]
+      - FnCall:
+          - simd_shr
+          - - a
+            - FnCall: ["vdup{neon_type[0].N}", ['n as _']]
+
+  - name: "vshr{neon_type[0].N}"
+    doc: "Shift right"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vshr.{neon_type[0]}"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ushr, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, 'N >= 1 && N <= 8', 'let n: i32 = if N == 8 { return vdup_n_u8(0);    }', 'else { N };']
+      - [uint8x16_t, 'N >= 1 && N <= 8', 'let n: i32 = if N == 8 { return vdupq_n_u8(0);   }', 'else { N };']
+      - [uint16x4_t, 'N >= 1 && N <= 16', 'let n: i32 = if N == 16 { return vdup_n_u16(0);  }', 'else { N };']
+      - [uint16x8_t, 'N >= 1 && N <= 16', 'let n: i32 = if N == 16 { return vdupq_n_u16(0); }', 'else { N };']
+      - [uint32x2_t, 'N >= 1 && N <= 32', 'let n: i32 = if N == 32 { return vdup_n_u32(0);  }', 'else { N };']
+      - [uint32x4_t, 'N >= 1 && N <= 32', 'let n: i32 = if N == 32 { return vdupq_n_u32(0); }', 'else { N };']
+      - [uint64x1_t, 'N >= 1 && N <= 64', 'let n: i32 = if N == 64 { return vdup_n_u64(0);  }', 'else { N };']
+      - [uint64x2_t, 'N >= 1 && N <= 64', 'let n: i32 = if N == 64 { return vdupq_n_u64(0); }', 'else { N };']
+    compose:
+      - FnCall: [static_assert!, ["{type[1]}"]]
+      - Identifier: ['{type[2]}{type[3]}', Symbol]
+      - FnCall:
+          - simd_shr
+          - - a
+            - FnCall: ["vdup{neon_type[0].N}", ['n as _']]
+
+  - name: "vshrn_n_{neon_type[0]}"
+    doc: "Shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vshrn{type[2]}"', 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [shrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, '.i16', 'N >= 1 && N <= 8']
+      - [uint16x8_t, uint8x8_t, '.i16', 'N >= 1 && N <= 8']
+      - [int32x4_t, int16x4_t, '.i32', 'N >= 1 && N <= 16']
+      - [uint32x4_t, uint16x4_t, '.i32', 'N >= 1 && N <= 16']
+      - [int64x2_t, int32x2_t, '.i64', 'N >= 1 && N <= 32']
+      - [uint64x2_t, uint32x2_t, '.i64', 'N >= 1 && N <= 32']
+    compose:
+      - FnCall: [static_assert!, ["{type[3]}"]]
+      - FnCall:
+          - simd_cast
+          - - FnCall:
+                - simd_shr
+                - - a
+                  - FnCall: ["vdupq_n_{neon_type[0]}", ['N as _']]
+
+  - name: "vsra{neon_type[0].N}"
+    doc: "Signed shift right and accumulate"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vsra, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ssra, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'N >= 1 && N <= 8']
+      - [int8x16_t, 'N >= 1 && N <= 8']
+      - [int16x4_t, 'N >= 1 && N <= 16']
+      - [int16x8_t, 'N >= 1 && N <= 16']
+      - [int32x2_t, 'N >= 1 && N <= 32']
+      - [int32x4_t, 'N >= 1 && N <= 32']
+      - [int64x1_t, 'N >= 1 && N <= 64']
+      - [int64x2_t, 'N >= 1 && N <= 64']
+    compose:
+      - FnCall: [static_assert!, ["{type[1]}"]]
+      - FnCall:
+          - simd_add
+          - - a
+            - FnCall: ["vshr{neon_type[0].N}::<N>", [b]]
+
+  - name: "vtrn{neon_type[0].no}"
+    doc: "Transpose elements"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vtrn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [trn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [int16x4_t, int16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
+      - [int8x16_t, int8x16x2_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]', '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]']
+      - [int16x8_t, int16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [int32x4_t, int32x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
+      - [uint8x8_t, uint8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [uint16x4_t, uint16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
+      - [uint8x16_t, uint8x16x2_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]', '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]']
+      - [uint16x8_t, uint16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [uint32x4_t, uint32x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
+      - [poly8x8_t, poly8x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [poly16x4_t, poly16x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
+      - [poly8x16_t, poly8x16x2_t, '[0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]', '[1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]']
+      - [poly16x8_t, poly16x8x2_t, '[0, 8, 2, 10, 4, 12, 6, 14]', '[1, 9, 3, 11, 5, 13, 7, 15]']
+      - [float32x4_t, float32x4x2_t, '[0, 4, 2, 6]', '[1, 5, 3, 7]']
+    compose:
+      - Let:
+          - a1
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [a, b, "{type[2]}"]]
+      - Let:
+          - b1
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [a, b, "{type[3]}"]]
+      - FnCall:
+          - transmute
+          - - Identifier: ['(a1, b1)', Symbol]
+
+  - name: "vtrn{neon_type[0].no}"
+    doc: "Transpose elements"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vtrn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [zip]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]']
+      - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]']
+      - [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]']
+    compose:
+      - Let:
+          - a1
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [a, b, "{type[2]}"]]
+      - Let:
+          - b1
+          - "{neon_type[0]}"
+          - FnCall: [simd_shuffle!, [a, b, "{type[3]}"]]
+      - FnCall:
+          - transmute
+          - - Identifier: ['(a1, b1)', Symbol]
+
+  - name: "vzip{neon_type[0].no}"
+    doc: Zip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vorr]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, int8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
+      - [int16x8_t, int16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [int32x4_t, int32x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
+      - [uint8x16_t, uint8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
+      - [uint16x8_t, uint16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [uint32x4_t, uint32x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
+      - [poly8x16_t, poly8x16x2_t, '[0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]', '[8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]']
+      - [poly16x8_t, poly16x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [float32x4_t, float32x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
+    compose:
+      - Let:
+          - a0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]]
+      - Let:
+          - b0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]]
+      - FnCall:
+          - transmute
+          - - '(a0, b0)'
+
+  - name: "vzip{neon_type[0].no}"
+    doc: Zip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]']
+      - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]']
+      - [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]']
+    compose:
+      - Let:
+          - a0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]]
+      - Let:
+          - b0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]]
+      - FnCall:
+          - transmute
+          - - '(a0, b0)'
+
+  - name: "vzip{neon_type[0].no}"
+    doc: Zip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vzip]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [int16x4_t, int16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
+      - [uint8x8_t, uint8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [uint16x4_t, uint16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
+      - [poly8x8_t, poly8x8x2_t, '[0, 8, 1, 9, 2, 10, 3, 11]', '[4, 12, 5, 13, 6, 14, 7, 15]']
+      - [poly16x4_t, poly16x4x2_t, '[0, 4, 1, 5]', '[2, 6, 3, 7]']
+    compose:
+      - Let:
+          - a0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]]
+      - Let:
+          - b0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]]
+      - FnCall:
+          - transmute
+          - - '(a0, b0)'
+
+  - name: "vuzp{neon_type[0].no}"
+    doc: Unzip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vuzp]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [int16x4_t, int16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
+      - [int8x16_t, int8x16x2_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]', '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]']
+      - [int16x8_t, int16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [int32x4_t, int32x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
+      - [uint8x8_t, uint8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [uint16x4_t, uint16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
+      - [uint8x16_t, uint8x16x2_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]', '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]']
+      - [uint16x8_t, uint16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [uint32x4_t, uint32x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
+      - [poly8x8_t, poly8x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [poly16x4_t, poly16x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
+      - [poly8x16_t, poly8x16x2_t, '[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]', '[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]']
+      - [poly16x8_t, poly16x8x2_t, '[0, 2, 4, 6, 8, 10, 12, 14]', '[1, 3, 5, 7, 9, 11, 13, 15]']
+      - [float32x4_t, float32x4x2_t, '[0, 2, 4, 6]', '[1, 3, 5, 7]']
+    compose:
+      - Let:
+          - a0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]]
+      - Let:
+          - b0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]]
+      - FnCall:
+          - transmute
+          - - '(a0, b0)'
+
+  - name: "vuzp{neon_type[0].no}"
+    doc: Unzip vectors
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vtrn]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x2x2_t, '[0, 2]', '[1, 3]']
+      - [int32x2_t, int32x2x2_t, '[0, 2]', '[1, 3]']
+      - [uint32x2_t, uint32x2x2_t, '[0, 2]', '[1, 3]']
+    compose:
+      - Let:
+          - a0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[2]}"]]
+      - Let:
+          - b0
+          - "{neon_type[0]}"
+          - FnCall: ["simd_shuffle!", [a, b, "{type[3]}"]]
+      - FnCall:
+          - transmute
+          - - '(a0, b0)'
+
+  - name: "vabal_{neon_type[1]}"
+    doc: "Unsigned Absolute difference and Accumulate Long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vabal.{type[2]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [uabal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t, "u8"]
+      - [uint32x4_t, uint16x4_t, "u16"]
+      - [uint64x2_t, uint32x2_t, "u32"]
+    compose:
+      - Let: [d, "{neon_type[1]}", {FnCall: ["vabd_{type[2]}", [b, c]]}]
+      - FnCall: [simd_add, [a, {FnCall: [simd_cast, [d]]}]]
+
+  - name: "vabal_{neon_type[1]}"
+    doc: "Signed Absolute difference and Accumulate Long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vabal.{neon_type[1]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sabal]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, uint8x8_t]
+      - [int32x4_t, int16x4_t, uint16x4_t]
+      - [int64x2_t, int32x2_t, uint32x2_t]
+    compose:
+      - Let: [d, "{type[1]}", {FnCall: ["vabd_{neon_type[1]}", [b, c]]}]
+      - Let: [e, "{type[2]}", {FnCall: ["simd_cast", [d]]}]
+      - FnCall: [simd_add, [a, {FnCall: [simd_cast, [e]]}]]
+
+  - name: "vqabs{neon_type.no}"
+    doc: Signed saturating Absolute value
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vqabs.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqabs]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int8x16_t
+      - int16x4_t
+      - int16x8_t
+      - int32x2_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "sqabs.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.sqabs.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vqabs.{neon_type}"
+              arch: arm
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vst1]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [st1]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*mut u8", uint8x8x2_t, int8x8x2_t]
+      - ["*mut u16", uint16x4x2_t, int16x4x2_t]
+      - ["*mut u32", uint32x2x2_t, int32x2x2_t]
+      - ["*mut u64", uint64x1x2_t, int64x1x2_t]
+      - ["*mut u8", uint8x16x2_t, int8x16x2_t]
+      - ["*mut u16", uint16x8x2_t, int16x8x2_t]
+      - ["*mut u32", uint32x4x2_t, int32x4x2_t]
+      - ["*mut u64", uint64x2x2_t, int64x2x2_t]
+      - ["*mut u8", uint8x8x3_t, int8x8x3_t]
+      - ["*mut u16", uint16x4x3_t, int16x4x3_t]
+      - ["*mut u32", uint32x2x3_t, int32x2x3_t]
+      - ["*mut u64", uint64x1x3_t, int64x1x3_t]
+      - ["*mut u8", uint8x16x3_t, int8x16x3_t]
+      - ["*mut u16", uint16x8x3_t, int16x8x3_t]
+      - ["*mut u32", uint32x4x3_t, int32x4x3_t]
+      - ["*mut u64", uint64x2x3_t, int64x2x3_t]
+      - ["*mut u8", uint8x8x4_t, int8x8x4_t]
+      - ["*mut u16", uint16x4x4_t, int16x4x4_t]
+      - ["*mut u32", uint32x2x4_t, int32x2x4_t]
+      - ["*mut u64", uint64x1x4_t, int64x1x4_t]
+      - ["*mut u8", uint8x16x4_t, int8x16x4_t]
+      - ["*mut u16", uint16x8x4_t, int16x8x4_t]
+      - ["*mut u32", uint32x4x4_t, int32x4x4_t]
+      - ["*mut u64", uint64x2x4_t, int64x2x4_t]
+      - ["*mut p8", poly8x8x2_t, int8x8x2_t]
+      - ["*mut p8", poly8x8x3_t, int8x8x3_t]
+      - ["*mut p8", poly8x8x4_t, int8x8x4_t]
+      - ["*mut p8", poly8x16x2_t, int8x16x2_t]
+      - ["*mut p8", poly8x16x3_t, int8x16x3_t]
+      - ["*mut p8", poly8x16x4_t, int8x16x4_t]
+      - ["*mut p16", poly16x4x2_t, int16x4x2_t]
+      - ["*mut p16", poly16x4x3_t, int16x4x3_t]
+      - ["*mut p16", poly16x4x4_t, int16x4x4_t]
+      - ["*mut p16", poly16x8x2_t, int16x8x2_t]
+      - ["*mut p16", poly16x8x3_t, int16x8x3_t]
+      - ["*mut p16", poly16x8x4_t, int16x8x4_t]
+    compose:
+      - FnCall:
+          - "vst1{neon_type[2].no}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,aes"']]
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v8"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vst1]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [st1]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*mut p64", poly64x1x2_t, int64x1x2_t]
+    compose:
+      - FnCall:
+          - "vst1{neon_type[2].no}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,aes"']]
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v8"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [st1]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*mut p64", poly64x1x3_t, int64x1x3_t]
+      - ["*mut p64", poly64x1x4_t, int64x1x4_t]
+      - ["*mut p64", poly64x2x2_t, int64x2x2_t]
+      - ["*mut p64", poly64x2x3_t, int64x2x3_t]
+      - ["*mut p64", poly64x2x4_t, int64x2x4_t]
+    compose:
+      - FnCall:
+          - "vst1{neon_type[2].no}"
+          - - FnCall: [transmute, [a]]
+            - FnCall: [transmute, [b]]
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*mut f32', float32x2x2_t, float32x2_t]
+      - ['*mut f32', float32x4x2_t, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vst1{neon_type[1].no}"
+          arguments:
+            - "ptr: {type[0]}"
+            - "a: {neon_type[2]}"
+            - "b: {neon_type[2]}"
+          links:
+            - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0f32.{neon_type[2]}"
+              arch: arm
+      - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1']]
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst1]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*mut f32', float32x2x3_t, float32x2_t]
+      - ['*mut f32', float32x4x3_t, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vst1{neon_type[1].no}"
+          arguments:
+            - "ptr: {type[0]}"
+            - "a: {neon_type[2]}"
+            - "b: {neon_type[2]}"
+            - "c: {neon_type[2]}"
+          links:
+            - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0f32.{neon_type[2]}"
+              arch: arm
+      - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1', 'b.2']]
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*mut f32", float32x2x2_t, float32x2_t]
+      - ["*mut f32", float32x4x2_t, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vst1{neon_type[1].no}"
+          arguments:
+            - "a: {neon_type[2]}"
+            - "b: {neon_type[2]}"
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.st1x2.{neon_type[2]}.p0f32"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', a]]
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*mut f32", float32x2x3_t, float32x2_t]
+      - ["*mut f32", float32x4x3_t, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vst1{neon_type[1].no}"
+          arguments:
+            - "a: {neon_type[2]}"
+            - "b: {neon_type[2]}"
+            - "c: {neon_type[2]}"
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.st1x3.{neon_type[2]}.p0f32"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', a]]
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*mut f32", float32x2x4_t, float32x2_t]
+      - ["*mut f32", float32x4x4_t, float32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vst1{neon_type[1].no}"
+          arguments:
+            - "a: {neon_type[2]}"
+            - "b: {neon_type[2]}"
+            - "c: {neon_type[2]}"
+            - "d: {neon_type[2]}"
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.st1x4.{neon_type[2]}.p0f32"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vst1{neon_type[1].no}", ['b.0', 'b.1', 'b.2', 'b.3', a]]
+
+  - name: "vfms{neon_type.no}"
+    doc: "Floating-point fused multiply-subtract from accumulator"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vfms]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmls]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - Let: [b, "{neon_type}", {FnCall: [simd_neg, [b]]}]
+      - FnCall: ["vfma{neon_type.no}", [a, b, c]]
+
+  - name: "vmul{neon_type[0].no}"
+    doc: "Polynomial multiply"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [pmul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [poly8x8_t, int8x8_t]
+      - [poly8x16_t, int8x16_t]
+    compose:
+      - LLVMLink:
+          name: "vmul{neon_type[0].no}"
+          links:
+            - link: "llvm.arm.neon.vmulp.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.pmul.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmls{neon_type.no}"
+    doc: "Floating-point multiply-subtract from accumulator"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmls.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - FnCall: [simd_sub, [a, {FnCall: [simd_mul, [b, c]]}]]
+
+  - name: "vcge{neon_type.no}"
+    doc: "Compare unsigned greater than or equal"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmhs]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - FnCall: [simd_ge, [a, b]]
+
+  - name: "vcge{neon_type[0].no}"
+    doc: "Floating-point compare greater than or equal"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmge]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - FnCall: [simd_ge, [a, b]]
+
+  - name: "vclt{neon_type.no}"
+    doc: "Compare unsigned less than"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmhi]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - FnCall: [simd_lt, [a, b]]
+
+  - name: "vtst{neon_type[0].no}"
+    doc: "Unsigned compare bitwise Test bits nonzero"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vtst]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmtst]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, u8x8, 'u8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [uint8x16_t, u8x16, 'u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)']
+      - [uint16x4_t, u16x4, 'u16x4::new(0, 0, 0, 0)']
+      - [uint16x8_t, u16x8, 'u16x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
+      - [uint32x2_t, u32x2, 'u32x2::new(0, 0)']
+      - [uint32x4_t, u32x4, 'u32x4::new(0, 0, 0, 0)']
+    compose:
+      - Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}]
+      - Let: [d, "{type[1]}", "{type[2]}"]
+      - FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]]
+
+  - name: "vshl{neon_type[0].N}"
+    doc: "Shift left"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vshl, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [shl, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, '3']
+      - [int8x16_t, '3']
+      - [int16x4_t, '4']
+      - [int16x8_t, '4']
+      - [int32x2_t, '5']
+      - [int32x4_t, '5']
+      - [uint8x8_t, '3']
+      - [uint8x16_t, '3']
+      - [uint16x4_t, '4']
+      - [uint16x8_t, '4']
+      - [uint32x2_t, '5']
+      - [uint32x4_t, '5']
+      - [int64x1_t, '6']
+      - [int64x2_t, '6']
+      - [uint64x1_t, '6']
+      - [uint64x2_t, '6']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[1]}"]]
+      - FnCall:
+          - simd_shl
+          - - a
+            - FnCall: ["vdup{neon_type[0].N}", ['N as _']]
+
+  - name: "vsra{neon_type[0].N}"
+    doc: "Unsigned shift right and accumulate"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vsra, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [usra, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, '8']
+      - [uint8x16_t, '8']
+      - [uint16x4_t, '16']
+      - [uint16x8_t, '16']
+      - [uint32x2_t, '32']
+      - [uint32x4_t, '32']
+      - [uint64x1_t, '64']
+      - [uint64x2_t, '64']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= {type[1]}']]
+      - FnCall:
+          - simd_add
+          - - a
+            - FnCall: ["vshr{neon_type[0].N}::<N>", [b]]
+
+  - name: "vrsra{neon_type[0].N}"
+    doc: "Unsigned rounding shift right and accumulate"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsra, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ursra, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, '8']
+      - [uint8x16_t, '8']
+      - [uint16x4_t, '16']
+      - [uint16x8_t, '16']
+      - [uint32x2_t, '32']
+      - [uint32x4_t, '32']
+      - [uint64x1_t, '64']
+      - [uint64x2_t, '64']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= {type[1]}']]
+      - FnCall:
+          - simd_add
+          - - a
+            - FnCall: ["vrshr{neon_type[0].N}::<N>", [b]]
+
+  - name: "vqrshrn_n_{neon_type[0]}"
+    doc: "Unsigned signed saturating rounded shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqrshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
+      - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
+      - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']]
+      - LLVMLink:
+          name: "vqrshrn{neon_type[0].N}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: {neon_type[0]}"
+          links:
+            - link: "llvm.arm.neon.vqrshiftnu.{neon_type[1]}"
+              arch: arm
+      - FnCall: ["_vqrshrn{neon_type[0].N}", ["a.as_signed()", "{type[3]}"]]
+
+  - name: "vqrshrn_n_{neon_type[0]}"
+    doc: "Unsigned signed saturating rounded shift right narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-not-arm
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t, '8']
+      - [uint32x4_t, uint16x4_t, '16']
+      - [uint64x2_t, uint32x2_t, '32']
+    compose:
+      - FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']]
+      - LLVMLink:
+          name: "vqrshrn_n_{neon_type[0]}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.uqrshrn.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vqrshrn_n_{neon_type[0]}", ["a.as_signed()", N]]
+
+  - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to unsigned fixed-point, rounding toward zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtzu]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.fptoui.sat.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+            - link: "llvm.fptoui.sat.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmla{neon_type[0].N}"
+    doc: "Vector multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmla.i16"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mla]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, "i16", int16x4_t]
+      - [int16x8_t, "i16", int16x8_t]
+      - [uint16x4_t, "u16", uint16x4_t]
+      - [uint16x8_t, "u16", uint16x8_t]
+    compose:
+      - FnCall:
+          - "vmla{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[0].N}", [c]]
+
+  - name: "vmla{neon_type[0].N}"
+    doc: "Vector multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmla.i32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mla]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, "i32", int32x2_t]
+      - [int32x4_t, "i32", int32x4_t]
+      - [uint32x2_t, "u32", uint32x2_t]
+      - [uint32x4_t, "u32", uint32x4_t]
+    compose:
+      - FnCall:
+          - "vmla{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[0].N}", [c]]
+
+  - name: "vmla{neon_type[0].N}"
+    doc: "Vector multiply accumulate with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmla.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, "f32", float32x2_t]
+      - [float32x4_t, "f32", float32x4_t]
+    compose:
+      - FnCall: ["vmla{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [c]]}]]
+
+  - name: "vmla{type[0]}"
+    doc: "Vector multiply accumulate with scalar"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmla.i16"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mla, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - "vmla{neon_type[1].no}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vmla{type[0]}"
+    doc: "Vector multiply accumulate with scalar"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmla.i32"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mla, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - "vmla{neon_type[1].no}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vmla{type[0]}"
+    doc: "Vector multiply accumulate with scalar"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmla.f32"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - "vmla{neon_type[1].no}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vmls{neon_type[0].N}"
+    doc: "Vector multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmls.i16"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mls]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, "i16", int16x4_t]
+      - [int16x8_t, "i16", int16x8_t]
+      - [uint16x4_t, "u16", uint16x4_t]
+      - [uint16x8_t, "u16", uint16x8_t]
+    compose:
+      - FnCall:
+          - "vmls{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[0].N}", [c]]
+
+  - name: "vmls{neon_type[0].N}"
+    doc: "Vector multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmls.i32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mls]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, "i32", int32x2_t]
+      - [int32x4_t, "i32", int32x4_t]
+      - [uint32x2_t, "u32", uint32x2_t]
+      - [uint32x4_t, "u32", uint32x4_t]
+    compose:
+      - FnCall:
+          - "vmls{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[0].N}", [c]]
+
+  - name: "vmls{neon_type[0].N}"
+    doc: "Vector multiply subtract with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmls.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, "f32", float32x2_t]
+      - [float32x4_t, "f32", float32x4_t]
+    compose:
+      - FnCall: ["vmls{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [c]]}]]
+
+  - name: "vmls{type[0]}"
+    doc: "Vector multiply subtract with scalar"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmls.i16"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mls, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - "vmls{neon_type[1].no}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vmls{type[0]}"
+    doc: "Vector multiply subtract with scalar"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmls.i32"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mls, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - "vmls{neon_type[1].no}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vmls{type[0]}"
+    doc: "Vector multiply subtract with scalar"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vmls.f32"', 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - "vmls{neon_type[1].no}"
+          - - a
+            - b
+            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+
+  - name: "vmul{neon_type[0].N}"
+    doc: "Vector multiply by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [mul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, "i16"]
+      - [int16x8_t, "i16"]
+      - [int32x2_t, "i32"]
+      - [int32x4_t, "i32"]
+      - [uint16x4_t, "u16"]
+      - [uint16x8_t, "u16"]
+      - [uint32x2_t, "u32"]
+      - [uint32x4_t, "u32"]
+    compose:
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall: ["vdup{neon_type[0].N}", [b]]
+
+  - name: "vmul{neon_type[0].N}"
+    doc: "Vector multiply by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, "f32"]
+      - [float32x4_t, "f32"]
+    compose:
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall: ["vdup{neon_type[0].N}", [b]]
+
+  - name: "vmul{type[2]}"
+    doc: "Floating-point multiply"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmul, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x2_t, '_lane_f32', '1', '[LANE as u32, LANE as u32]']
+      - [float32x2_t, float32x4_t, '_laneq_f32', '2', '[LANE as u32, LANE as u32]']
+      - [float32x4_t, float32x2_t, 'q_lane_f32', '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [float32x4_t, float32x4_t, 'q_laneq_f32', '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
+      - FnCall:
+          - simd_mul
+          - - a
+            - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
+
+  - name: "vqrdmulh{type[0]}"
+    doc: "Vector rounding saturating doubling multiply high by scalar"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqrdmulh, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqrdmulh, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - Let: [b, "{neon_type[1]}", {FnCall: [simd_shuffle!, [b, b, '{type[4]}']]}]
+      - FnCall: ["vqrdmulh{neon_type[1].no}", [a, b]]
+
+  - name: "vqrdmulh{neon_type[0].N}"
+    doc: "Vector saturating rounding doubling multiply high with scalar"
+    arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqrdmulh]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqrdmulh]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, "i16"]
+      - [int16x8_t, "i16"]
+      - [int32x2_t, "i32"]
+      - [int32x4_t, "i32"]
+    compose:
+      - FnCall:
+          - "vqrdmulh{neon_type[0].no}"
+          - - a
+            - FnCall: ["vdup{neon_type[0].N}", [b]]
+
+  - name: "vclt{neon_type[0].no}"
+    doc: "Floating-point compare less than"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcgt.f32"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcmgt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+    compose:
+      - FnCall: [simd_lt, [a, b]]
+
+  - name: "vabdl_{neon_type[0]}"
+    doc: "Unsigned Absolute difference Long"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vabdl.{neon_type[0]}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [uabdl]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint16x8_t]
+      - [uint16x4_t, uint32x4_t]
+      - [uint32x2_t, uint64x2_t]
+    compose:
+      - FnCall: [simd_cast, [{FnCall: ["vabd_{neon_type[0]}", [a, b]]}]]
+
+  - name: "vmull_lane{neon_type[1].no}"
+    doc: "Vector long multiply by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmull, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [smull, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x4_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x4_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x2_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [int32x2_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vmull_{neon_type[0]}"
+          - - a
+            - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
+
+  - name: "vmull_lane{neon_type[1].no}"
+    doc: "Vector long multiply by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vmull, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [umull, 'LANE = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]']
+      - [uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
+      - FnCall:
+          - "vmull_{neon_type[0]}"
+          - - a
+            - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
+
+  - name: "vfms{neon_type[0].N}"
+    doc: "Floating-point fused Multiply-subtract to accumulator(vector)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "vfp4"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vfms]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fmls]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, "f32"]
+      - [float32x4_t, "f32"]
+    compose:
+      - FnCall:
+          - "vfms{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ["vdup{neon_type[0].N}_vfp4", [c]]
+
+  - name: "vqdmulh{neon_type[0].laneq_nox}"
+    doc: "Vector saturating doubling multiply high by scalar"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqdmulh, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [sqdmulh, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    static_defs: ['const LANE: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int16x8_t, '3']
+      - [int16x4_t, int16x8_t, '3']
+      - [int32x4_t, int32x4_t, '2']
+      - [int32x2_t, int32x4_t, '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
+      - FnCall:
+          - "vqdmulh{neon_type[0].no}"
+          - - a
+            - FnCall:
+                - "vdup{neon_type[0].N}"
+                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+
+  - name: "vrecpe{neon_type.no}"
+    doc: "Unsigned reciprocal estimate"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrecpe]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [urecpe]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: "vrecpe{neon_type.no}"
+          links:
+            - link: "llvm.arm.neon.vrecpe.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.urecpe.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrsqrte{neon_type.no}"
+    doc: "Unsigned reciprocal square root estimate"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsqrte]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ursqrte]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: "vrsqrte{neon_type.no}"
+          links:
+            - link: "llvm.arm.neon.vrsqrte.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.ursqrte.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vrsqrte{neon_type.no}"
+    doc: "Reciprocal square-root estimate."
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vrsqrte]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [frsqrte]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+      - float32x4_t
+    compose:
+      - LLVMLink:
+          name: "vrsqrte{neon_type.no}"
+          links:
+            - link: "llvm.arm.neon.vrsqrte.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.frsqrte.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vqshlu{neon_type[0].N}"
+    doc: "Signed saturating shift left unsigned"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *target-is-arm
+      - *enable-v7
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vqshlu, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
+      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }']
+      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N as i32, N as i32]) }']
+      - [int64x1_t, uint64x1_t, '6', 'const { int64x1_t([N as i64]) }']
+      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
+      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) }']
+      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }']
+      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64, N as i64]) }']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
+      - LLVMLink:
+          name: "vqshlu{neon_type[0].N}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: {neon_type[0]}"
+          links:
+            - link: "llvm.arm.neon.vqshiftsu.{neon_type[0]}"
+              arch: arm
+      - FnCall: ["_vqshlu{neon_type[0].N}", [a, "{type[3]}"]]
+
+  - name: "vqshlu{neon_type[0].N}"
+    doc: "Signed saturating shift left unsigned"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshlu, 'N = 2']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
+      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }']
+      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N as i32, N as i32]) }']
+      - [int64x1_t, uint64x1_t, '6', 'const { int64x1_t([N as i64]) }']
+      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
+      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) }']
+      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }']
+      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64, N as i64]) }']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
+      - LLVMLink:
+          name: "vqshlu{neon_type[0].N}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "n: {neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.sqshlu.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vqshlu{neon_type[0].N}", [a, "{type[3]}"]]
+
+  - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+    doc: "Floating-point convert to signed fixed-point, rounding toward zero"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vcvt]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [fcvtzs]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, int32x2_t]
+      - [float32x4_t, int32x4_t]
+    compose:
+      - LLVMLink:
+          name: "vcvt{neon_type[1].no}_{neon_type[0]}"
+          links:
+            - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+            - link: "llvm.fptosi.sat.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vqmovn_{neon_type[0]}"
+    doc: "Unsigned saturating extract narrow"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vqmovn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [uqxtn]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t]
+      - [uint32x4_t, uint16x4_t]
+      - [uint64x2_t, uint32x2_t]
+    compose:
+      - LLVMLink:
+          name: "vqmovn_{neon_type[1]}"
+          links:
+            - link: "llvm.arm.neon.vqmovnu.{neon_type[1]}"
+              arch: arm
+            - link: "llvm.aarch64.neon.uqxtn.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vcle{neon_type.no}"
+    doc: "Compare unsigned less than or equal"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vcge.{neon_type}"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [cmhs]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint8x16_t
+      - uint16x4_t
+      - uint16x8_t
+      - uint32x2_t
+      - uint32x4_t
+    compose:
+      - FnCall: [simd_le, [a, b]]
+
+  - name: "vld4{neon_type[1].dup_nox}"
+    doc: "Load single 4-element structure and replicate to all lanes of four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vld4]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x4_t, int8x8_t, '1']
+      - ["*const i16", int16x4x4_t, int16x4_t, '2']
+      - ["*const i32", int32x2x4_t, int32x2_t, '4']
+      - ["*const i8", int8x16x4_t, int8x16_t, '1']
+      - ["*const i16", int16x8x4_t, int16x8_t, '2']
+      - ["*const i32", int32x4x4_t, int32x4_t, '4']
+      - ["*const f32", float32x2x4_t, float32x2_t, '4']
+      - ["*const f32", float32x4x4_t, float32x4_t, '4']
+    compose:
+      - LLVMLink:
+          name: "vld4{neon_type[1].dup_nox}"
+          arguments:
+            - "ptr: *const i8"
+            - "size: i32"
+          links:
+            - link: "llvm.arm.neon.vld4dup.{neon_type[2]}.p0i8"
+              arch: arm
+      - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as *const i8', "{type[3]}"]]
+
+  - name: "vld4{neon_type[1].dup_nox}"
+    doc: "Load single 4-element structure and replicate to all lanes of four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, [{FnCall: [not, ['target_arch = "arm"']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i8", int8x8x4_t, int8x8_t, 'p0i8']
+      - ["*const i16", int16x4x4_t, int16x4_t, 'p0i16']
+      - ["*const i32", int32x2x4_t, int32x2_t, 'p0i32']
+      - ["*const i8", int8x16x4_t, int8x16_t, 'p0i8']
+      - ["*const i16", int16x8x4_t, int16x8_t, 'p0i16']
+      - ["*const i32", int32x4x4_t, int32x4_t, 'p0i32']
+      - ["*const i64", int64x1x4_t, int64x1_t, 'p0i64']
+      - ["*const f32", float32x2x4_t, float32x2_t, 'p0f32']
+      - ["*const f32", float32x4x4_t, float32x4_t, 'p0f32']
+    compose:
+      - LLVMLink:
+          name: "vld4{neon_type[1].dup_nox}"
+          arguments:
+            - "ptr: {type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.ld4r.{neon_type[2]}.{type[3]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as _']]
+
+  - name: "vld4{neon_type[1].dup_nox}"
+    doc: "Load single 4-element structure and replicate to all lanes of four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const i64", int64x1x4_t]
+    compose:
+      - LLVMLink:
+          name: "vld4{neon_type[1].dup_nox}"
+          arguments:
+            - "ptr: *const i8"
+            - "size: i32"
+          links:
+            - link: "llvm.arm.neon.vld4dup.v1i64.p0i8"
+              arch: arm
+      - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as *const i8', '8']]
+
+  - name: "vld4{neon_type[1].dup_nox}"
+    doc: "Load single 4-element structure and replicate to all lanes of four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [vld4]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ld4r]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u8", uint8x8x4_t, int8x8x4_t]
+      - ["*const u16", uint16x4x4_t, int16x4x4_t]
+      - ["*const u32", uint32x2x4_t, int32x2x4_t]
+      - ["*const u8", uint8x16x4_t, int8x16x4_t]
+      - ["*const u16", uint16x8x4_t, int16x8x4_t]
+      - ["*const u32", uint32x4x4_t, int32x4x4_t]
+      - ["*const p8", poly8x8x4_t, int8x8x4_t]
+      - ["*const p16", poly16x4x4_t, int16x4x4_t]
+      - ["*const p8", poly8x16x4_t, int8x16x4_t]
+      - ["*const p16", poly16x8x4_t, int16x8x4_t]
+    compose:
+      - FnCall:
+          - "transmute"
+          - - FnCall: ["vld4{neon_type[2].dup_nox}", [{FnCall: [transmute, [a]]}]]
+
+  - name: "vld4{neon_type[1].dup_nox}"
+    doc: "Load single 4-element structure and replicate to all lanes of four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ld4r]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const u64", uint64x1x4_t, int64x1x4_t]
+    compose:
+      - FnCall:
+          - "transmute"
+          - - FnCall: ["vld4{neon_type[2].dup_nox}", [{FnCall: [transmute, [a]]}]]
+
+  - name: "vld4{neon_type[1].dup_nox}"
+    doc: "Load single 4-element structure and replicate to all lanes of four registers"
+    arguments: ["a: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,aes"']]
+      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v8"']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [ld4r]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
+      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["*const p64", poly64x1x4_t, int64x1x4_t]
+    compose:
+      - FnCall:
+          - "transmute"
+          - - FnCall: ["vld4{neon_type[2].dup_nox}", [{FnCall: [transmute, [a]]}]]
+
diff --git a/crates/stdarch-gen2/src/context.rs b/crates/stdarch-gen2/src/context.rs
index 108f7ab706..8405428b7a 100644
--- a/crates/stdarch-gen2/src/context.rs
+++ b/crates/stdarch-gen2/src/context.rs
@@ -1,6 +1,6 @@
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
+use std::{collections::HashMap, usize};
 
 use crate::{
     expression::Expression,
@@ -71,17 +71,31 @@ impl LocalContext {
     }
 
     pub fn provide_type_wildcard(&self, wildcard: &Wildcard) -> Result<TypeKind> {
-        let err = || format!("wildcard {{{wildcard}}} not found");
+        let err = || {
+            format!(
+                "provide_type_wildcard() wildcard {{{wildcard}}} not found for {}",
+                &self.signature.name.to_string()
+            )
+        };
 
-        let make_neon = |tuple_size| move |ty| TypeKind::make_vector(ty, false, tuple_size);
+        /* If the type is already a vector then we can just return the vector */
+        let make_neon = |tuple_size| {
+            move |ty| match ty {
+                TypeKind::Vector(_) => Ok(ty),
+                _ => TypeKind::make_vector(ty, false, tuple_size),
+            }
+        };
         let make_sve = |tuple_size| move |ty| TypeKind::make_vector(ty, true, tuple_size);
 
         match wildcard {
             Wildcard::Type(idx) => self.input.typekind(*idx).ok_or_else(err),
-            Wildcard::NEONType(idx, tuple_size) => self
+            Wildcard::NEONType(idx, tuple_size, _) => self
                 .input
                 .typekind(*idx)
-                .ok_or_else(err)
+                .ok_or_else(|| {
+                    dbg!("{:?}", &self);
+                    err()
+                })
                 .and_then(make_neon(*tuple_size)),
             Wildcard::SVEType(idx, tuple_size) => self
                 .input
diff --git a/crates/stdarch-gen2/src/expression.rs b/crates/stdarch-gen2/src/expression.rs
index 4434ae276e..8398467958 100644
--- a/crates/stdarch-gen2/src/expression.rs
+++ b/crates/stdarch-gen2/src/expression.rs
@@ -1,6 +1,6 @@
 use itertools::Itertools;
 use lazy_static::lazy_static;
-use proc_macro2::{Literal, TokenStream};
+use proc_macro2::{Literal, Punct, Spacing, TokenStream};
 use quote::{format_ident, quote, ToTokens, TokenStreamExt};
 use regex::Regex;
 use serde::de::{self, MapAccess, Visitor};
@@ -48,8 +48,16 @@ impl FnCall {
     }
 
     pub fn is_llvm_link_call(&self, llvm_link_name: &String) -> bool {
+        self.is_expected_call(llvm_link_name)
+    }
+
+    pub fn is_target_feature_call(&self) -> bool {
+        self.is_expected_call("target_feature")
+    }
+
+    pub fn is_expected_call(&self, fn_call_name: &str) -> bool {
         if let Expression::Identifier(fn_name, IdentifierType::Symbol) = self.0.as_ref() {
-            &fn_name.to_string() == llvm_link_name
+            &fn_name.to_string() == fn_call_name
         } else {
             false
         }
@@ -128,6 +136,8 @@ pub enum Expression {
     SvUndef,
     /// Multiplication
     Multiply(Box<Expression>, Box<Expression>),
+    /// Xor
+    Xor(Box<Expression>, Box<Expression>),
     /// Converts the specified constant to the specified type's kind
     ConvertConst(TypeKind, i32),
     /// Yields the given type in the Rust representation
@@ -149,7 +159,7 @@ impl Expression {
                 ex.pre_build(ctx)
             }
             Self::CastAs(ex, _) => ex.pre_build(ctx),
-            Self::Multiply(lhs, rhs) => {
+            Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => {
                 lhs.pre_build(ctx)?;
                 rhs.pre_build(ctx)
             }
@@ -223,7 +233,7 @@ impl Expression {
                 ex.build(intrinsic, ctx)
             }
             Self::CastAs(ex, _) => ex.build(intrinsic, ctx),
-            Self::Multiply(lhs, rhs) => {
+            Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => {
                 lhs.build(intrinsic, ctx)?;
                 rhs.build(intrinsic, ctx)
             }
@@ -279,7 +289,7 @@ impl Expression {
                 exp.requires_unsafe_wrapper(ctx_fn)
             }
             Self::Array(exps) => exps.iter().any(|exp| exp.requires_unsafe_wrapper(ctx_fn)),
-            Self::Multiply(lhs, rhs) => {
+            Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => {
                 lhs.requires_unsafe_wrapper(ctx_fn) || rhs.requires_unsafe_wrapper(ctx_fn)
             }
             Self::CastAs(exp, _ty) => exp.requires_unsafe_wrapper(ctx_fn),
@@ -413,7 +423,26 @@ impl ToTokens for Expression {
                 tokens.append_all(quote! { let #var_ident: #ty = #exp })
             }
             Self::Assign(var_name, exp) => {
-                let var_ident = format_ident!("{}", var_name);
+                /* If we are dereferencing a variable to assign a value \
+                 * the 'format_ident!' macro does not like the asterix */
+                let var_name_str: &str;
+
+                if let Some(ch) = var_name.chars().nth(0) {
+                    /* Manually append the asterix and split out the rest of
+                     * the variable name */
+                    if ch == '*' {
+                        tokens.append(Punct::new('*', Spacing::Alone));
+                        var_name_str = &var_name[1..var_name.len()];
+                    } else {
+                        var_name_str = var_name.as_str();
+                    }
+                } else {
+                    /* Should not be reached as you cannot have a variable
+                     * without a name */
+                    panic!("Invalid variable name, must be at least one character")
+                }
+
+                let var_ident = format_ident!("{}", var_name_str);
                 tokens.append_all(quote! { #var_ident = #exp })
             }
             Self::MacroCall(name, ex) => {
@@ -434,7 +463,7 @@ impl ToTokens for Expression {
                 identifier
                     .to_string()
                     .parse::<TokenStream>()
-                    .expect("invalid syntax")
+                    .expect(format!("invalid syntax: {:?}", self).as_str())
                     .to_tokens(tokens);
             }
             Self::IntConstant(n) => tokens.append(Literal::i32_unsuffixed(*n)),
@@ -449,6 +478,7 @@ impl ToTokens for Expression {
             }
             Self::SvUndef => tokens.append_all(quote! { simd_reinterpret(()) }),
             Self::Multiply(lhs, rhs) => tokens.append_all(quote! { #lhs * #rhs }),
+            Self::Xor(lhs, rhs) => tokens.append_all(quote! { #lhs ^ #rhs }),
             Self::Type(ty) => ty.to_tokens(tokens),
             _ => unreachable!("{self:?} cannot be converted to tokens."),
         }
diff --git a/crates/stdarch-gen2/src/fn_suffix.rs b/crates/stdarch-gen2/src/fn_suffix.rs
new file mode 100644
index 0000000000..9f7827776e
--- /dev/null
+++ b/crates/stdarch-gen2/src/fn_suffix.rs
@@ -0,0 +1,308 @@
+use std::fmt::{self};
+
+/* This file is acting as a bridge between the old neon types and how they
+ * have a fairly complex way of picking suffixes and the new world. If possible
+ * it would be good to clean this up. At least it is self contained and the
+ * logic simple */
+use crate::typekinds::{BaseType, BaseTypeKind, TypeKind, VectorType};
+use serde::{Deserialize, Serialize};
+
+use std::str::FromStr;
+
+#[allow(clippy::enum_variant_names)]
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Deserialize, Serialize)]
+pub enum SuffixKind {
+    Normal,
+    Base,
+    NoQ,
+    NSuffix,
+    NoQNSuffix,
+    DupNox,
+    Dup,
+    /* Get the number of lanes or panic if there are not any Lanes */
+    Lane,
+    Rot270,
+    Rot270Lane,
+    Rot270LaneQ,
+    Rot180,
+    Rot180Lane,
+    Rot180LaneQ,
+    Rot90,
+    Rot90Lane,
+    Rot90LaneQ,
+    /* Force the type to be unsigned */
+    Unsigned,
+    Tuple,
+    NoX,
+    BaseByteSize,
+    LaneNoX,
+    LaneQNoX,
+}
+
+pub fn type_to_size(str_type: &str) -> i32 {
+    match str_type {
+        "int8x8_t" | "int8x16_t" | "i8" | "s8" | "uint8x8_t" | "uint8x16_t" | "u8"
+        | "poly8x8_t" | "poly8x16_t" => 8,
+        "int16x4_t" | "int16x8_t" | "i16" | "s16" | "uint16x4_t" | "uint16x8_t" | "u16"
+        | "float16x4_t" | "float16x8_t" | "_f16" | "poly16x4_t" | "poly16x8_t" => 16,
+        "int32x2_t" | "int32x4_t" | "i32" | "s32" | "uint32x2_t" | "uint32x4_t" | "u32"
+        | "float32x2_t" | "float32x4_t" | "f32" => 32,
+        "int64x1_t" | "int64x2_t" | "i64" | "s64" | "uint64x1_t" | "uint64x2_t" | "u64"
+        | "float64x1_t" | "float64x2_t" | "f64" | "poly64x1_t" | "poly64x2_t" | "p64" => 64,
+        "p128" => 128,
+        _ => panic!("unknown type: {str_type}"),
+    }
+}
+
+fn neon_get_base_and_char(ty: &VectorType) -> (u32, char, bool) {
+    let lanes = ty.lanes();
+    match ty.base_type() {
+        BaseType::Sized(BaseTypeKind::Float, size) => (*size, 'f', *size * lanes == 128),
+        BaseType::Sized(BaseTypeKind::Int, size) => (*size, 's', *size * lanes == 128),
+        BaseType::Sized(BaseTypeKind::UInt, size) => (*size, 'u', *size * lanes == 128),
+        BaseType::Sized(BaseTypeKind::Poly, size) => (*size, 'p', *size * lanes == 128),
+        _ => panic!("Unhandled {:?}", ty),
+    }
+}
+
+/* @TODO
+ * for the chained enum types we can safely delete them as we can index the
+ * types array */
+pub fn make_neon_suffix(type_kind: TypeKind, suffix_kind: SuffixKind) -> String {
+    match type_kind {
+        TypeKind::Vector(ty) => {
+            let tuple_size = ty.tuple_size().map_or(0, |t| t.to_int());
+            let (base_size, prefix_char, requires_q) = neon_get_base_and_char(&ty);
+            let lanes = ty.lanes();
+            match suffix_kind {
+                SuffixKind::Normal => {
+                    let mut str_suffix: String = String::new();
+                    if requires_q {
+                        str_suffix.push('q');
+                    }
+                    str_suffix.push('_');
+                    str_suffix.push(prefix_char);
+                    str_suffix.push_str(base_size.to_string().as_str());
+                    if tuple_size > 0 {
+                        str_suffix.push_str("_x");
+                        str_suffix.push_str(tuple_size.to_string().as_str());
+                    }
+                    return str_suffix;
+                }
+                SuffixKind::NSuffix => {
+                    let mut str_suffix: String = String::new();
+                    if requires_q {
+                        str_suffix.push('q');
+                    }
+                    str_suffix.push_str("_n_");
+                    str_suffix.push(prefix_char);
+                    str_suffix.push_str(base_size.to_string().as_str());
+                    return str_suffix;
+                }
+
+                SuffixKind::NoQ => format!("_{}{}", prefix_char, base_size),
+                SuffixKind::NoQNSuffix => format!("_n{}{}", prefix_char, base_size),
+
+                SuffixKind::Unsigned => {
+                    let t = type_kind.to_string();
+                    if t.starts_with("u") {
+                        return t;
+                    }
+                    return format!("u{}", t);
+                }
+                SuffixKind::Lane => {
+                    if lanes == 0 {
+                        panic!("type {} has no lanes!", type_kind.to_string())
+                    } else {
+                        format!("{}", lanes)
+                    }
+                }
+                SuffixKind::Tuple => {
+                    if tuple_size == 0 {
+                        panic!("type {} has no lanes!", type_kind.to_string())
+                    } else {
+                        format!("{}", tuple_size)
+                    }
+                }
+                SuffixKind::Base => base_size.to_string(),
+                SuffixKind::NoX => {
+                    let mut str_suffix: String = String::new();
+                    if requires_q {
+                        str_suffix.push('q');
+                    }
+                    str_suffix.push('_');
+                    str_suffix.push(prefix_char);
+                    str_suffix.push_str(base_size.to_string().as_str());
+                    return str_suffix;
+                }
+                SuffixKind::Dup => {
+                    let mut str_suffix: String = String::new();
+                    if requires_q {
+                        str_suffix.push('q');
+                    }
+                    str_suffix.push('_');
+                    str_suffix.push_str("dup_");
+                    str_suffix.push(prefix_char);
+                    str_suffix.push_str(base_size.to_string().as_str());
+                    if tuple_size > 0 {
+                        str_suffix.push_str("_x");
+                        str_suffix.push_str(tuple_size.to_string().as_str());
+                    }
+                    return str_suffix;
+                }
+                SuffixKind::DupNox => {
+                    let mut str_suffix: String = String::new();
+                    if requires_q {
+                        str_suffix.push('q');
+                    }
+                    str_suffix.push('_');
+                    str_suffix.push_str("dup_");
+                    str_suffix.push(prefix_char);
+                    str_suffix.push_str(base_size.to_string().as_str());
+                    return str_suffix;
+                }
+                SuffixKind::LaneNoX => {
+                    let mut str_suffix: String = String::new();
+                    if requires_q {
+                        str_suffix.push('q');
+                    }
+                    str_suffix.push('_');
+                    str_suffix.push_str("lane_");
+                    str_suffix.push(prefix_char);
+                    str_suffix.push_str(base_size.to_string().as_str());
+                    return str_suffix;
+                }
+                SuffixKind::LaneQNoX => {
+                    let mut str_suffix: String = String::new();
+                    if requires_q {
+                        str_suffix.push('q');
+                    }
+                    str_suffix.push('_');
+                    str_suffix.push_str("laneq_");
+                    str_suffix.push(prefix_char);
+                    str_suffix.push_str(base_size.to_string().as_str());
+                    return str_suffix;
+                }
+                SuffixKind::Rot270 => {
+                    if requires_q {
+                        return format!("q_rot270_{}{}", prefix_char, base_size.to_string());
+                    }
+                    return format!("_rot270_{}{}", prefix_char, base_size.to_string());
+                }
+                SuffixKind::Rot270Lane => {
+                    if requires_q {
+                        return format!("q_rot270_lane_{}{}", prefix_char, base_size.to_string());
+                    }
+                    return format!("_rot270_lane_{}{}", prefix_char, base_size.to_string());
+                }
+                SuffixKind::Rot270LaneQ => {
+                    if requires_q {
+                        return format!("q_rot270_laneq_{}{}", prefix_char, base_size.to_string());
+                    }
+                    return format!("_rot270_laneq_{}{}", prefix_char, base_size.to_string());
+                }
+                SuffixKind::Rot180 => {
+                    if requires_q {
+                        return format!("q_rot180_{}{}", prefix_char, base_size.to_string());
+                    }
+                    return format!("_rot180_{}{}", prefix_char, base_size.to_string());
+                }
+                SuffixKind::Rot180Lane => {
+                    if requires_q {
+                        return format!("q_rot180_lane_{}{}", prefix_char, base_size.to_string());
+                    }
+                    return format!("_rot180_lane_{}{}", prefix_char, base_size.to_string());
+                }
+                SuffixKind::Rot180LaneQ => {
+                    if requires_q {
+                        return format!("q_rot180_laneq_{}{}", prefix_char, base_size.to_string());
+                    }
+                    return format!("_rot180_laneq_{}{}", prefix_char, base_size.to_string());
+                }
+                SuffixKind::Rot90 => {
+                    if requires_q {
+                        return format!("q_rot90_{}{}", prefix_char, base_size.to_string());
+                    }
+                    return format!("_rot90_{}{}", prefix_char, base_size.to_string());
+                }
+                SuffixKind::Rot90Lane => {
+                    if requires_q {
+                        return format!("q_rot90_lane_{}{}", prefix_char, base_size.to_string());
+                    }
+                    return format!("_rot90_lane_{}{}", prefix_char, base_size.to_string());
+                }
+                SuffixKind::Rot90LaneQ => {
+                    if requires_q {
+                        return format!("q_rot90_laneq_{}{}", prefix_char, base_size.to_string());
+                    }
+                    return format!("_rot90_laneq_{}{}", prefix_char, base_size.to_string());
+                }
+                SuffixKind::BaseByteSize => format!("{}", base_size / 8),
+            }
+        }
+        _ => panic!("Cannot only make neon vector types suffixed"),
+    }
+}
+
+impl FromStr for SuffixKind {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "no" => Ok(SuffixKind::Normal),
+            "noq" => Ok(SuffixKind::NoQ),
+            "N" => Ok(SuffixKind::NSuffix),
+            "noq_N" => Ok(SuffixKind::NoQNSuffix),
+            "dup_nox" => Ok(SuffixKind::DupNox),
+            "dup" => Ok(SuffixKind::Dup),
+            "lane" => Ok(SuffixKind::Lane),
+            "base" => Ok(SuffixKind::Base),
+            "tuple" => Ok(SuffixKind::Tuple),
+            "rot270" => Ok(SuffixKind::Rot270),
+            "rot270_lane" => Ok(SuffixKind::Rot270Lane),
+            "rot270_laneq" => Ok(SuffixKind::Rot270LaneQ),
+            "rot90" => Ok(SuffixKind::Rot90),
+            "rot90_lane" => Ok(SuffixKind::Rot90Lane),
+            "rot90_laneq" => Ok(SuffixKind::Rot90LaneQ),
+            "rot180" => Ok(SuffixKind::Rot180),
+            "rot180_lane" => Ok(SuffixKind::Rot180LaneQ),
+            "rot180_laneq" => Ok(SuffixKind::Rot180LaneQ),
+            "u" => Ok(SuffixKind::Unsigned),
+            "nox" => Ok(SuffixKind::NoX),
+            "base_byte_size" => Ok(SuffixKind::BaseByteSize),
+            "lane_nox" => Ok(SuffixKind::LaneNoX),
+            "laneq_nox" => Ok(SuffixKind::LaneQNoX),
+            _ => Err(format!("unknown suffix type: {}", s)),
+        }
+    }
+}
+
+impl fmt::Display for SuffixKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SuffixKind::Normal => write!(f, "normal"),
+            SuffixKind::NoQ => write!(f, "NoQ"),
+            SuffixKind::NSuffix => write!(f, "NSuffix"),
+            SuffixKind::NoQNSuffix => write!(f, "NoQNSuffix"),
+            SuffixKind::DupNox => write!(f, "DupNox"),
+            SuffixKind::Dup => write!(f, "Dup",),
+            SuffixKind::Lane => write!(f, "Lane"),
+            SuffixKind::LaneNoX => write!(f, "LaneNoX"),
+            SuffixKind::LaneQNoX => write!(f, "LaneQNoX"),
+            SuffixKind::Base => write!(f, "Base"),
+            SuffixKind::Rot270 => write!(f, "Rot270",),
+            SuffixKind::Rot270Lane => write!(f, "Rot270Lane"),
+            SuffixKind::Rot270LaneQ => write!(f, "Rot270LaneQ"),
+            SuffixKind::Rot90 => write!(f, "Rot90",),
+            SuffixKind::Rot90Lane => write!(f, "Rot90Lane"),
+            SuffixKind::Rot90LaneQ => write!(f, "Rot90LaneQ"),
+            SuffixKind::Rot180 => write!(f, "Rot180",),
+            SuffixKind::Rot180Lane => write!(f, "Rot180Lane"),
+            SuffixKind::Rot180LaneQ => write!(f, "Rot180LaneQ"),
+            SuffixKind::Unsigned => write!(f, "Unsigned"),
+            SuffixKind::Tuple => write!(f, "Tuple"),
+            SuffixKind::NoX => write!(f, "NoX"),
+            SuffixKind::BaseByteSize => write!(f, "BaseByteSize"),
+        }
+    }
+}
diff --git a/crates/stdarch-gen2/src/intrinsic.rs b/crates/stdarch-gen2/src/intrinsic.rs
index d05b71e44d..4a973691b1 100644
--- a/crates/stdarch-gen2/src/intrinsic.rs
+++ b/crates/stdarch-gen2/src/intrinsic.rs
@@ -4,7 +4,8 @@ use quote::{format_ident, quote, ToTokens, TokenStreamExt};
 use serde::{Deserialize, Serialize};
 use serde_with::{DeserializeFromStr, SerializeDisplay};
 use std::collections::{HashMap, HashSet};
-use std::fmt;
+use std::fmt::{self};
+use std::num::ParseIntError;
 use std::ops::RangeInclusive;
 use std::str::FromStr;
 
@@ -16,6 +17,7 @@ use crate::{
     assert_instr::InstructionAssertionMethod,
     context::{self, ArchitectureSettings, Context, LocalContext, VariableType},
     expression::{Expression, FnCall, IdentifierType},
+    fn_suffix::{type_to_size, SuffixKind},
     input::IntrinsicInput,
     matching::{KindMatchable, SizeMatchable},
     typekinds::*,
@@ -238,7 +240,10 @@ impl Constraint {
                 .map_err(|_| format!("the minimum value `{min}` is not a valid number"))?;
             let max: i32 = max
                 .parse()
-                .map_err(|_| format!("the maximum value `{max}` is not a valid number"))?;
+                .or_else(|_| Ok(type_to_size(max.as_str())))
+                .map_err(|_: ParseIntError| {
+                    format!("the maximum value `{max}` is not a valid number")
+                })?;
             *self = Self::RangeI32 {
                 variable: variable.to_owned(),
                 range: SizeMatchable::Matched(RangeInclusive::new(min, max)),
@@ -282,6 +287,9 @@ pub struct Signature {
     /// Function return type, leave unset for void
     pub return_type: Option<TypeKind>,
 
+    /// For some neon intrinsics we want to modify the suffix of the function name
+    pub suffix_type: Option<SuffixKind>,
+
     /// List of static definitions, leave unset of empty if not required
     #[serde(default)]
     pub static_defs: Vec<StaticDefinition>,
@@ -312,7 +320,11 @@ impl Signature {
     }
 
     pub fn build(&mut self, ctx: &LocalContext) -> context::Result {
-        self.name.build_acle(ctx)?;
+        if self.name_has_neon_suffix() {
+            self.name.build_neon_intrinsic_signature(ctx)?;
+        } else {
+            self.name.build_acle(ctx)?;
+        }
 
         if let Some(ref mut return_type) = self.return_type {
             if let Some(w) = return_type.clone().wildcard() {
@@ -342,6 +354,20 @@ impl Signature {
     pub fn doc_name(&self) -> String {
         self.name.to_string()
     }
+
+    fn name_has_neon_suffix(&self) -> bool {
+        for part in self.name.wildcards() {
+            let has_suffix = match part {
+                Wildcard::NEONType(_, _, suffix_type) => suffix_type.is_some(),
+                _ => false,
+            };
+
+            if has_suffix {
+                return true;
+            }
+        }
+        false
+    }
 }
 
 impl ToTokens for Signature {
@@ -380,18 +406,46 @@ impl ToTokens for Signature {
     }
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct LLVMLinkAttribute {
+    /// Either one architecture or a comma separated list of architectures with NO spaces
     pub arch: String,
-    pub link: String,
+    pub link: WildString,
 }
 
 impl ToTokens for LLVMLinkAttribute {
     fn to_tokens(&self, tokens: &mut TokenStream) {
         let LLVMLinkAttribute { arch, link } = self;
-        tokens.append_all(quote! {
-            #[cfg_attr(target_arch = #arch, link_name = #link)]
-        })
+        let link = link.to_string();
+        let archs: Vec<&str> = arch.split(',').collect();
+        let arch_len = archs.len();
+
+        if arch_len == 1 {
+            tokens.append_all(quote! {
+                #[cfg_attr(target_arch = #arch, link_name = #link)]
+            })
+        } else {
+            tokens.append(Punct::new('#', Spacing::Alone));
+            tokens.append(Punct::new('[', Spacing::Alone));
+            tokens.append_all(quote! { cfg_attr });
+            tokens.append(Punct::new('(', Spacing::Alone));
+            tokens.append_all(quote! { any });
+            tokens.append(Punct::new('(', Spacing::Alone));
+            let mut i = 0;
+            while i < arch_len {
+                let arch = archs[i].to_string();
+                tokens.append_all(quote! { target_arch = #arch });
+                if i + 1 != arch_len {
+                    tokens.append(Punct::new(',', Spacing::Alone));
+                }
+                i += 1;
+            }
+            tokens.append(Punct::new(')', Spacing::Alone));
+            tokens.append(Punct::new(',', Spacing::Alone));
+            tokens.append_all(quote! { link_name = #link });
+            tokens.append(Punct::new(')', Spacing::Alone));
+            tokens.append(Punct::new(']', Spacing::Alone));
+        }
     }
 }
 
@@ -406,10 +460,9 @@ pub struct LLVMLink {
     /// LLVM link signature return type, leave unset if it inherits from intrinsic's signature
     pub return_type: Option<TypeKind>,
 
-    /// **Internal use only. Do not set.**
+    /// **This will be set automatically if not set**
     /// Attribute LLVM links for the function. First element is the architecture it targets,
     /// second element is the LLVM link itself.
-    #[serde(skip)]
     pub links: Option<Vec<LLVMLinkAttribute>>,
 
     /// **Internal use only. Do not set.**
@@ -453,6 +506,7 @@ impl LLVMLink {
                 .return_type
                 .clone()
                 .or_else(|| ctx.local.signature.return_type.clone()),
+            suffix_type: None,
             static_defs: vec![],
             is_predicate_specific: ctx.local.signature.is_predicate_specific,
             predicate_needs_conversion: false,
@@ -467,16 +521,25 @@ impl LLVMLink {
             .insert(Wildcard::LLVMLink, sig.fn_name().to_string());
 
         self.signature = Some(Box::new(sig));
-        self.links = Some(
-            ctx.global
-                .arch_cfgs
-                .iter()
-                .map(|cfg| LLVMLinkAttribute {
-                    arch: cfg.arch_name.to_owned(),
-                    link: self.resolve(cfg),
-                })
-                .collect_vec(),
-        );
+
+        if let Some(ref mut links) = self.links {
+            links.iter_mut().for_each(|ele| {
+                ele.link
+                    .build(&ctx.local, TypeRepr::LLVMMachine)
+                    .expect("Failed to transform to LLVMMachine representation");
+            });
+        } else {
+            self.links = Some(
+                ctx.global
+                    .arch_cfgs
+                    .iter()
+                    .map(|cfg| LLVMLinkAttribute {
+                        arch: cfg.arch_name.to_owned(),
+                        link: self.resolve(cfg).into(),
+                    })
+                    .collect_vec(),
+            );
+        }
 
         Ok(())
     }
@@ -603,8 +666,8 @@ impl ToTokens for LLVMLink {
         let signature = self.signature.as_ref().unwrap();
         let links = self.links.as_ref().unwrap();
         tokens.append_all(quote! {
-            extern "C" {
-                #(#links),*
+            extern "unadjusted" {
+                #(#links)*
                 #signature;
             }
         })
@@ -725,6 +788,7 @@ pub enum UnsafetyComment {
     Dereference(GovernedBy),
     UnpredictableOnFault,
     NonTemporal,
+    Neon,
     NoProvenance(String),
 }
 
@@ -759,6 +823,7 @@ impl fmt::Display for UnsafetyComment {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
             Self::Custom(s) => s.fmt(f),
+            Self::Neon => write!(f, "Neon instrinsic unsafe"),
             Self::Uninitialized => write!(
                 f,
                 "This creates an uninitialized value, and may be unsound (like \
@@ -834,13 +899,15 @@ pub struct Intrinsic {
     /// behaviour is all unsigned types are converted to signed)
     #[serde(default)]
     pub defer_to_signed_only_indices: HashSet<usize>,
-    pub assert_instr: Vec<InstructionAssertionMethod>,
+    pub assert_instr: Option<Vec<InstructionAssertionMethod>>,
     /// Whether we should generate a test for this intrinsic
     #[serde(default)]
     pub test: Test,
     /// Primary base type, used for instruction assertion.
     #[serde(skip)]
     pub base_type: Option<BaseType>,
+    /// Attributes for the function
+    pub attr: Option<Vec<Expression>>,
 }
 
 impl Intrinsic {
@@ -930,6 +997,21 @@ impl Intrinsic {
             }
         };
 
+        if variant.attr.is_none() && variant.assert_instr.is_none() {
+            panic!("Error: {} is missing both 'attr' and 'assert_instr' fields. You must either manually declare the attributes using the 'attr' field or use 'assert_instr'!", variant.signature.name.to_string());
+        }
+
+        if variant.attr.is_some() {
+            let attr: &Vec<Expression> = &variant.attr.clone().unwrap();
+            let mut expanded_attr: Vec<Expression> = Vec::new();
+            for idx in 0..attr.len() {
+                let mut ex = attr[idx].clone();
+                ex.build(&variant, &mut ctx)?;
+                expanded_attr.push(ex);
+            }
+            variant.attr = Some(expanded_attr);
+        }
+
         variant.post_build(&mut ctx)?;
 
         if let Some(n_variant_op) = ctx.local.n_variant_op().cloned() {
@@ -1238,9 +1320,9 @@ impl Intrinsic {
             }
         }
 
-        self.assert_instr
-            .iter_mut()
-            .try_for_each(|ai| ai.build(ctx))?;
+        if let Some(ref mut assert_instr) = self.assert_instr {
+            assert_instr.iter_mut().try_for_each(|ai| ai.build(ctx))?;
+        }
 
         // Prepend constraint assertions
         self.constraints.iter_mut().try_for_each(|c| c.build(ctx))?;
@@ -1347,8 +1429,13 @@ impl Intrinsic {
                         match (from_base_type, to_base_type) {
                             // Use AsSigned for uint -> int
                             (Some(BaseTypeKind::UInt), Some(BaseTypeKind::Int)) => as_signed(ex),
+                            (Some(BaseTypeKind::Int), Some(BaseTypeKind::Int)) => ex,
                             // Use AsUnsigned for int -> uint
                             (Some(BaseTypeKind::Int), Some(BaseTypeKind::UInt)) => as_unsigned(ex),
+                            (Some(BaseTypeKind::Float), Some(BaseTypeKind::Float)) => ex,
+                            (Some(BaseTypeKind::UInt), Some(BaseTypeKind::UInt)) => ex,
+                            (Some(BaseTypeKind::Poly), Some(BaseTypeKind::Poly)) => ex,
+
                             (None, None) => ex,
                             _ => unreachable!("unsupported conversion case from {from_base_type:?} to {to_base_type:?} hit"),
                         }
@@ -1426,11 +1513,9 @@ impl ToTokens for Intrinsic {
         if let Some(doc) = &self.doc {
             let mut doc = vec![doc.to_string()];
 
-            doc.push(String::new());
             doc.push(format!("[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/{})", &signature.doc_name()));
 
             if safety.has_doc_comments() {
-                doc.push(String::new());
                 doc.push("## Safety".to_string());
                 for comment in safety.doc_comments() {
                     doc.push(format!("  * {comment}"));
@@ -1454,14 +1539,43 @@ impl ToTokens for Intrinsic {
             );
         }
 
-        tokens.append_all(quote! {
-            #[inline]
-            #[target_feature(enable = #target_feature)]
-        });
+        tokens.append_all(quote! { #[inline] });
+
+        /* If we have manually defined attributes on the block of yaml with
+         * 'attr:' we want to add them */
+        if let Some(attr) = &self.attr {
+            /* Scan to see if we have defined `FnCall: [target_feature, ['<bespoke>']]`*/
+            if !has_target_feature_attr(attr) {
+                /* If not add the default one that is defined at the top of
+                 * the yaml file. This does mean we scan the attributes vector
+                 * twice, once to see if the `target_feature` exists and again
+                 * to actually append the tokens. We could impose that the
+                 * `target_feature` call has to be the first argument of the
+                 * `attr` block */
+                tokens.append_all(quote! {
+                    #[target_feature(enable = #target_feature)]
+                });
+            }
 
-        if !self.assert_instr.is_empty() {
-            InstructionAssertionsForBaseType(&self.assert_instr, &self.base_type.as_ref())
-                .to_tokens(tokens)
+            /* Target feature will get added here */
+            let attr_expressions = &mut attr.iter().peekable();
+            while let Some(ex) = attr_expressions.next() {
+                tokens.append(Punct::new('#', Spacing::Alone));
+                tokens.append(Punct::new('[', Spacing::Alone));
+                ex.to_tokens(tokens);
+                tokens.append(Punct::new(']', Spacing::Alone));
+            }
+        } else {
+            tokens.append_all(quote! {
+                #[target_feature(enable = #target_feature)]
+            });
+        }
+
+        if let Some(assert_instr) = &self.assert_instr {
+            if !assert_instr.is_empty() {
+                InstructionAssertionsForBaseType(&assert_instr, &self.base_type.as_ref())
+                    .to_tokens(tokens)
+            }
         }
 
         match &self.visibility {
@@ -1494,5 +1608,17 @@ impl ToTokens for Intrinsic {
         }
 
         tokens.append(Punct::new('}', Spacing::Alone));
+        tokens.append(Punct::new('\n', Spacing::Alone));
+        tokens.append(Punct::new('\n', Spacing::Alone));
     }
 }
+
+fn has_target_feature_attr(attrs: &[Expression]) -> bool {
+    attrs.iter().any(|attr| {
+        if let Expression::FnCall(fn_call) = attr {
+            fn_call.is_target_feature_call()
+        } else {
+            false
+        }
+    })
+}
diff --git a/crates/stdarch-gen2/src/main.rs b/crates/stdarch-gen2/src/main.rs
index 5379d18404..22bf6724b0 100644
--- a/crates/stdarch-gen2/src/main.rs
+++ b/crates/stdarch-gen2/src/main.rs
@@ -3,6 +3,7 @@
 mod assert_instr;
 mod context;
 mod expression;
+mod fn_suffix;
 mod input;
 mod intrinsic;
 mod load_store_tests;
@@ -39,7 +40,9 @@ fn main() -> Result<(), String> {
         .into_iter()
         .map(|(input, filepath, out)| {
             let intrinsics = input.intrinsics.into_iter()
-                .map(|intrinsic| intrinsic.generate_variants(&input.ctx))
+                .map(|intrinsic| {
+                    intrinsic.generate_variants(&input.ctx)
+                })
                 .try_collect()
                 .map(|mut vv: Vec<_>| {
                     vv.sort_by_cached_key(|variants| {
@@ -50,23 +53,26 @@ fn main() -> Result<(), String> {
                     vv.into_iter().flatten().collect_vec()
                 })?;
 
-            let loads = intrinsics.iter()
-                .filter_map(|i| {
-                    if matches!(i.test, Test::Load(..)) {
-                        Some(i.clone())
-                    } else {
-                        None
-                    }
-                }).collect();
-            let stores = intrinsics.iter()
-                .filter_map(|i| {
-                    if matches!(i.test, Test::Store(..)) {
-                        Some(i.clone())
-                    } else {
-                        None
-                    }
-                }).collect();
-            load_store_tests::generate_load_store_tests(loads, stores, out.as_ref().map(|o| make_tests_filepath(&filepath, o)).as_ref())?;
+            if filepath.ends_with("sve.spec.yml") || filepath.ends_with("sve2.spec.yml") {
+                let loads = intrinsics.iter()
+                    .filter_map(|i| {
+                        if matches!(i.test, Test::Load(..)) {
+                            Some(i.clone())
+                        } else {
+                            None
+                        }
+                    }).collect();
+                let stores = intrinsics.iter()
+                    .filter_map(|i| {
+                        if matches!(i.test, Test::Store(..)) {
+                            Some(i.clone())
+                        } else {
+                            None
+                        }
+                    }).collect();
+                load_store_tests::generate_load_store_tests(loads, stores, out.as_ref().map(|o| make_tests_filepath(&filepath, o)).as_ref())?;
+            }
+
             Ok((
                 input::GeneratorInput {
                     intrinsics,
diff --git a/crates/stdarch-gen2/src/typekinds.rs b/crates/stdarch-gen2/src/typekinds.rs
index 71f6297d94..7a4fed85ce 100644
--- a/crates/stdarch-gen2/src/typekinds.rs
+++ b/crates/stdarch-gen2/src/typekinds.rs
@@ -67,6 +67,7 @@ pub struct TypeKindOptions {
     f: bool,
     s: bool,
     u: bool,
+    p: bool,
 }
 
 impl TypeKindOptions {
@@ -75,6 +76,7 @@ impl TypeKindOptions {
             BaseTypeKind::Float => self.f,
             BaseTypeKind::Int => self.s,
             BaseTypeKind::UInt => self.u,
+            BaseTypeKind::Poly => self.p,
             BaseTypeKind::Bool => false,
         }
     }
@@ -90,6 +92,7 @@ impl FromStr for TypeKindOptions {
                 b'f' => result.f = true,
                 b's' => result.s = true,
                 b'u' => result.u = true,
+                b'p' => result.p = true,
                 _ => {
                     return Err(format!("unknown type kind: {}", char::from(kind)));
                 }
@@ -113,6 +116,7 @@ pub enum BaseTypeKind {
     Int,
     UInt,
     Bool,
+    Poly,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -130,6 +134,16 @@ pub enum VectorTupleSize {
     Four,
 }
 
+impl VectorTupleSize {
+    pub fn to_int(&self) -> u32 {
+        match self {
+            Self::Two => 2,
+            Self::Three => 3,
+            Self::Four => 4,
+        }
+    }
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct VectorType {
     base_type: BaseType,
@@ -181,6 +195,7 @@ impl TypeKind {
         match self {
             Self::Base(ty) => Some(ty),
             Self::Pointer(tk, _) => tk.base(),
+            Self::Vector(ty) => Some(&ty.base_type),
             _ => None,
         }
     }
@@ -366,22 +381,22 @@ impl PartialOrd for TypeKind {
     }
 }
 
+impl From<&TypeKind> for usize {
+    fn from(ty: &TypeKind) -> Self {
+        match ty {
+            TypeKind::Base(_) => 1,
+            TypeKind::Pointer(_, _) => 2,
+            TypeKind::Vector(_) => 3,
+            TypeKind::Custom(_) => 4,
+            TypeKind::Wildcard(_) => 5,
+        }
+    }
+}
+
 impl Ord for TypeKind {
     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
         use std::cmp::Ordering::*;
 
-        impl From<&TypeKind> for usize {
-            fn from(ty: &TypeKind) -> Self {
-                match ty {
-                    TypeKind::Base(_) => 1,
-                    TypeKind::Pointer(_, _) => 2,
-                    TypeKind::Vector(_) => 3,
-                    TypeKind::Custom(_) => 4,
-                    TypeKind::Wildcard(_) => 5,
-                }
-            }
-        }
-
         let self_int: usize = self.into();
         let other_int: usize = other.into();
 
@@ -466,6 +481,14 @@ impl VectorType {
     pub fn cast_base_type_as(&mut self, ty: BaseType) {
         self.base_type = ty
     }
+
+    pub fn lanes(&self) -> u32 {
+        self.lanes
+    }
+
+    pub fn tuple_size(&self) -> Option<VectorTupleSize> {
+        self.tuple_size
+    }
 }
 
 impl FromStr for VectorType {
@@ -473,7 +496,7 @@ impl FromStr for VectorType {
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
         lazy_static! {
-            static ref RE: Regex = Regex::new(r"^(?:(?:sv(?P<sv_ty>(?:uint|int|bool|float)(?:\d+)?))|(?:(?P<ty>(?:uint|int|bool|float)(?:\d+)?)x(?P<lanes>[0-9])))(?:x(?P<tuple_size>2|3|4))?_t$").unwrap();
+            static ref RE: Regex = Regex::new(r"^(?:(?:sv(?P<sv_ty>(?:uint|int|bool|float)(?:\d+)?))|(?:(?P<ty>(?:uint|int|bool|poly|float)(?:\d+)?)x(?P<lanes>(?:\d+)?)))(?:x(?P<tuple_size>2|3|4))?_t$").unwrap();
         }
 
         if let Some(c) = RE.captures(s) {
@@ -498,12 +521,13 @@ impl FromStr for VectorType {
                 .transpose()
                 .unwrap();
 
-            Ok(VectorType {
+            let v = Ok(VectorType {
                 base_type,
                 is_scalable: c.name("sv_ty").is_some(),
                 lanes,
                 tuple_size,
-            })
+            });
+            return v;
         } else {
             Err(format!("invalid vector type {s:#?} given"))
         }
@@ -612,6 +636,7 @@ impl FromStr for BaseTypeKind {
             "float" | "f" => Ok(Self::Float),
             "int" | "i" => Ok(Self::Int),
             "uint" | "u" => Ok(Self::UInt),
+            "poly" | "p" => Ok(Self::Poly),
             "bool" | "b" => Ok(Self::Bool),
             _ => Err(format!("no match for {s}")),
         }
@@ -624,9 +649,11 @@ impl ToRepr for BaseTypeKind {
             (TypeRepr::C, Self::Float) => "float",
             (TypeRepr::C, Self::Int) => "int",
             (TypeRepr::C, Self::UInt) => "uint",
+            (TypeRepr::C, Self::Poly) => "poly",
             (TypeRepr::Rust | TypeRepr::LLVMMachine | TypeRepr::ACLENotation, Self::Float) => "f",
             (TypeRepr::Rust, Self::Int) | (TypeRepr::LLVMMachine, Self::Int | Self::UInt) => "i",
             (TypeRepr::Rust | TypeRepr::ACLENotation, Self::UInt) => "u",
+            (TypeRepr::Rust | TypeRepr::LLVMMachine | TypeRepr::ACLENotation, Self::Poly) => "p",
             (TypeRepr::ACLENotation, Self::Int) => "s",
             (TypeRepr::ACLENotation, Self::Bool) => "b",
             (_, Self::Bool) => "bool",
@@ -683,7 +710,6 @@ impl FromStr for BaseType {
                 .map(u32::from_str)
                 .transpose()
                 .unwrap();
-
             match size {
                 Some(size) => Ok(Self::Sized(kind, size)),
                 None => Ok(Self::Unsized(kind)),
@@ -705,6 +731,7 @@ impl ToRepr for BaseType {
             (Sized(_, size), SizeLiteral) if *size == 16 => "h".to_string(),
             (Sized(_, size), SizeLiteral) if *size == 32 => "w".to_string(),
             (Sized(_, size), SizeLiteral) if *size == 64 => "d".to_string(),
+            (Sized(_, size), SizeLiteral) if *size == 128 => "q".to_string(),
             (_, SizeLiteral) => unreachable!("cannot represent {self:#?} as size literal"),
             (Sized(Float, _) | Unsized(Float), TypeKind) => "f".to_string(),
             (Sized(Int, _) | Unsized(Int), TypeKind) => "s".to_string(),
diff --git a/crates/stdarch-gen2/src/wildcards.rs b/crates/stdarch-gen2/src/wildcards.rs
index 9d6194d517..25aa803489 100644
--- a/crates/stdarch-gen2/src/wildcards.rs
+++ b/crates/stdarch-gen2/src/wildcards.rs
@@ -5,6 +5,7 @@ use std::fmt;
 use std::str::FromStr;
 
 use crate::{
+    fn_suffix::SuffixKind,
     predicate_forms::PredicationMask,
     typekinds::{ToRepr, TypeKind, TypeKindOptions, VectorTupleSize},
 };
@@ -13,7 +14,7 @@ use crate::{
 pub enum Wildcard {
     Type(Option<usize>),
     /// NEON type derivated by a base type
-    NEONType(Option<usize>, Option<VectorTupleSize>),
+    NEONType(Option<usize>, Option<VectorTupleSize>, Option<SuffixKind>),
     /// SVE type derivated by a base type
     SVEType(Option<usize>, Option<VectorTupleSize>),
     /// Integer representation of bitsize
@@ -87,7 +88,14 @@ impl FromStr for Wildcard {
 
             let wildcard = match (wildcard_name, inputset_index, tuple_size, modifiers) {
                 ("type", index, None, None) => Ok(Wildcard::Type(index)),
-                ("neon_type", index, tuple, None) => Ok(Wildcard::NEONType(index, tuple)),
+                ("neon_type", index, tuple, modifier) => {
+                    if let Some(str_suffix) = modifier {
+                        let suffix_kind = SuffixKind::from_str(str_suffix);
+                        return Ok(Wildcard::NEONType(index, tuple, Some(suffix_kind.unwrap())));
+                    } else {
+                        Ok(Wildcard::NEONType(index, tuple, None))
+                    }
+                }
                 ("sve_type", index, tuple, None) => Ok(Wildcard::SVEType(index, tuple)),
                 ("size", index, None, None) => Ok(Wildcard::Size(index)),
                 ("size_minus_one", index, None, None) => Ok(Wildcard::SizeMinusOne(index)),
@@ -131,7 +139,7 @@ impl FromStr for Wildcard {
                 Ok(wildcard)
             }
         } else {
-            Err(format!("invalid wildcard `{s:#?}`"))
+            Err(format!("## invalid wildcard `{s:#?}`"))
         }
     }
 }
@@ -141,12 +149,22 @@ impl fmt::Display for Wildcard {
         match self {
             Self::Type(None) => write!(f, "type"),
             Self::Type(Some(index)) => write!(f, "type[{index}]"),
-            Self::NEONType(None, None) => write!(f, "neon_type"),
-            Self::NEONType(Some(index), None) => write!(f, "neon_type[{index}]"),
-            Self::NEONType(None, Some(tuple_size)) => write!(f, "neon_type_x{tuple_size}"),
-            Self::NEONType(Some(index), Some(tuple_size)) => {
+            Self::NEONType(None, None, None) => write!(f, "neon_type"),
+            Self::NEONType(None, None, Some(suffix_kind)) => write!(f, "neon_type.{suffix_kind}"),
+            Self::NEONType(Some(index), None, None) => write!(f, "neon_type[{index}]"),
+            Self::NEONType(Some(index), None, Some(suffix_kind)) => {
+                write!(f, "neon_type[{index}].{suffix_kind}")
+            }
+            Self::NEONType(None, Some(tuple_size), Some(suffix_kind)) => {
+                write!(f, "neon_type_x{tuple_size}.{suffix_kind}")
+            }
+            Self::NEONType(None, Some(tuple_size), None) => write!(f, "neon_type_x{tuple_size}"),
+            Self::NEONType(Some(index), Some(tuple_size), None) => {
                 write!(f, "neon_type_x{tuple_size}[{index}]")
             }
+            Self::NEONType(Some(index), Some(tuple_size), Some(suffix_kind)) => {
+                write!(f, "neon_type_x{tuple_size}[{index}].{suffix_kind}")
+            }
             Self::SVEType(None, None) => write!(f, "sve_type"),
             Self::SVEType(Some(index), None) => write!(f, "sve_type[{index}]"),
             Self::SVEType(None, Some(tuple_size)) => write!(f, "sve_type_x{tuple_size}"),
diff --git a/crates/stdarch-gen2/src/wildstring.rs b/crates/stdarch-gen2/src/wildstring.rs
index 1f9e6c9ada..2eb467b962 100644
--- a/crates/stdarch-gen2/src/wildstring.rs
+++ b/crates/stdarch-gen2/src/wildstring.rs
@@ -6,6 +6,7 @@ use std::str::pattern::Pattern;
 use std::{fmt, str::FromStr};
 
 use crate::context::LocalContext;
+use crate::fn_suffix::make_neon_suffix;
 use crate::typekinds::{ToRepr, TypeRepr};
 use crate::wildcards::Wildcard;
 
@@ -17,7 +18,7 @@ pub enum WildStringPart {
 
 /// Wildcard-able string
 #[derive(Debug, Clone, PartialEq, Eq, Default, SerializeDisplay, DeserializeFromStr)]
-pub struct WildString(Vec<WildStringPart>);
+pub struct WildString(pub Vec<WildStringPart>);
 
 impl WildString {
     pub fn has_wildcards(&self) -> bool {
@@ -67,7 +68,7 @@ impl WildString {
 
     pub fn replace<'a, P>(&'a self, from: P, to: &str) -> WildString
     where
-        P: Pattern<'a> + Copy,
+        P: Pattern + Copy,
     {
         WildString(
             self.0
@@ -84,17 +85,62 @@ impl WildString {
         self.build(ctx, TypeRepr::ACLENotation)
     }
 
-    pub fn build(&mut self, ctx: &LocalContext, repr: TypeRepr) -> Result<(), String> {
+    pub fn build_neon_intrinsic_signature(&mut self, ctx: &LocalContext) -> Result<(), String> {
+        let repr = TypeRepr::ACLENotation;
         self.iter_mut().try_for_each(|wp| -> Result<(), String> {
             if let WildStringPart::Wildcard(w) = wp {
-                let value = ctx
-                    .provide_substitution_wildcard(w)
-                    .or_else(|_| ctx.provide_type_wildcard(w).map(|ty| ty.repr(repr)))?;
-                *wp = WildStringPart::String(value);
+                match w {
+                    Wildcard::NEONType(_, _, ref maybe_suffix_kind) => {
+                        if let Some(suffix_kind) = maybe_suffix_kind {
+                            let x = ctx.provide_type_wildcard(w).unwrap();
+                            *wp = WildStringPart::String(make_neon_suffix(x, *suffix_kind))
+                        } else {
+                            *wp = WildString::make_default_build(ctx, repr, w)
+                        }
+                    }
+                    _ => *wp = WildString::make_default_build(ctx, repr, w),
+                }
             }
             Ok(())
         })
     }
+
+    pub fn build(&mut self, ctx: &LocalContext, repr: TypeRepr) -> Result<(), String> {
+        match repr {
+            TypeRepr::ACLENotation | TypeRepr::LLVMMachine => {
+                self.iter_mut().try_for_each(|wp| -> Result<(), String> {
+                    if let WildStringPart::Wildcard(w) = wp {
+                        match w {
+                            Wildcard::NEONType(_, _, ref maybe_suffix_kind) => {
+                                if let Some(suffix_kind) = maybe_suffix_kind {
+                                    let x = ctx.provide_type_wildcard(w).unwrap();
+                                    *wp = WildStringPart::String(make_neon_suffix(x, *suffix_kind))
+                                } else {
+                                    *wp = WildString::make_default_build(ctx, repr, w)
+                                }
+                            }
+                            _ => *wp = WildString::make_default_build(ctx, repr, w),
+                        }
+                    }
+                    Ok(())
+                })
+            }
+            _ => self.iter_mut().try_for_each(|wp| -> Result<(), String> {
+                if let WildStringPart::Wildcard(w) = wp {
+                    *wp = WildString::make_default_build(ctx, repr, w);
+                }
+                Ok(())
+            }),
+        }
+    }
+
+    fn make_default_build(ctx: &LocalContext, repr: TypeRepr, w: &mut Wildcard) -> WildStringPart {
+        WildStringPart::String(
+            ctx.provide_substitution_wildcard(w)
+                .or_else(|_| ctx.provide_type_wildcard(w).map(|ty| ty.repr(repr)))
+                .unwrap(),
+        )
+    }
 }
 
 impl From<String> for WildString {